mirror of
https://github.com/echemdata/galvani.git
synced 2025-12-13 17:05:35 +00:00
Merge remote-tracking branch 'cb-ml-evs/ml-evs/col-182-and-warning-only-mode'
Closes PR https://codeberg.org/echemdata/galvani/pulls/123 See also GitHub PR https://github.com/echemdata/galvani/pull/124
This commit is contained in:
@@ -13,9 +13,12 @@ from os import SEEK_SET
|
|||||||
import time
|
import time
|
||||||
from datetime import date, datetime, timedelta
|
from datetime import date, datetime, timedelta
|
||||||
from collections import defaultdict, OrderedDict
|
from collections import defaultdict, OrderedDict
|
||||||
|
import warnings
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
UNKNOWN_COLUMN_TYPE_HIERARCHY = ("<f8", "<f4", "<u4", "<u2", "<u1")
|
||||||
|
|
||||||
|
|
||||||
def fieldname_to_dtype(fieldname):
|
def fieldname_to_dtype(fieldname):
|
||||||
"""Converts a column header from the MPT file into a tuple of
|
"""Converts a column header from the MPT file into a tuple of
|
||||||
@@ -316,6 +319,7 @@ VMPdata_colID_dtype_map = {
|
|||||||
174: ("<Ewe>/V", "<f4"),
|
174: ("<Ewe>/V", "<f4"),
|
||||||
178: ("(Q-Qo)/C", "<f4"),
|
178: ("(Q-Qo)/C", "<f4"),
|
||||||
179: ("dQ/C", "<f4"),
|
179: ("dQ/C", "<f4"),
|
||||||
|
182: ("step time/s", "<f8"),
|
||||||
211: ("Q charge/discharge/mA.h", "<f8"),
|
211: ("Q charge/discharge/mA.h", "<f8"),
|
||||||
212: ("half cycle", "<u4"),
|
212: ("half cycle", "<u4"),
|
||||||
213: ("z cycle", "<u4"),
|
213: ("z cycle", "<u4"),
|
||||||
@@ -427,18 +431,23 @@ def parse_BioLogic_date(date_text):
|
|||||||
return date(tm.tm_year, tm.tm_mon, tm.tm_mday)
|
return date(tm.tm_year, tm.tm_mon, tm.tm_mday)
|
||||||
|
|
||||||
|
|
||||||
def VMPdata_dtype_from_colIDs(colIDs):
|
def VMPdata_dtype_from_colIDs(colIDs, error_on_unknown_column: bool = True):
|
||||||
"""Get a numpy record type from a list of column ID numbers.
|
"""Get a numpy record type from a list of column ID numbers.
|
||||||
|
|
||||||
The binary layout of the data in the MPR file is described by the sequence
|
The binary layout of the data in the MPR file is described by the sequence
|
||||||
of column ID numbers in the file header. This function converts that
|
of column ID numbers in the file header. This function converts that
|
||||||
sequence into a numpy dtype which can then be used to load data from the
|
sequence into a list that can be used with numpy dtype load data from the
|
||||||
file with np.frombuffer().
|
file with np.frombuffer().
|
||||||
|
|
||||||
Some column IDs refer to small values which are packed into a single byte.
|
Some column IDs refer to small values which are packed into a single byte.
|
||||||
The second return value is a dict describing the bit masks with which to
|
The second return value is a dict describing the bit masks with which to
|
||||||
extract these columns from the flags byte.
|
extract these columns from the flags byte.
|
||||||
|
|
||||||
|
If error_on_unknown_column is True, an error will be raised if an unknown
|
||||||
|
column ID is encountered. If it is False, a warning will be emited and attempts
|
||||||
|
will be made to read the column with a few different dtypes.
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
type_list = []
|
type_list = []
|
||||||
field_name_counts = defaultdict(int)
|
field_name_counts = defaultdict(int)
|
||||||
@@ -468,11 +477,19 @@ def VMPdata_dtype_from_colIDs(colIDs):
|
|||||||
unique_field_name = field_name
|
unique_field_name = field_name
|
||||||
type_list.append((unique_field_name, field_type))
|
type_list.append((unique_field_name, field_type))
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(
|
if error_on_unknown_column:
|
||||||
"Column ID {cid} after column {prev} "
|
raise NotImplementedError(
|
||||||
"is unknown".format(cid=colID, prev=type_list[-1][0])
|
"Column ID {cid} after column {prev} is unknown".format(
|
||||||
|
cid=colID, prev=type_list[-1][0]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
warnings.warn(
|
||||||
|
"Unknown column ID %d -- will attempt to read as common dtypes"
|
||||||
|
% colID
|
||||||
)
|
)
|
||||||
return np.dtype(type_list), flags_dict
|
type_list.append(("unknown_colID_%d" % colID, UNKNOWN_COLUMN_TYPE_HIERARCHY[0]))
|
||||||
|
|
||||||
|
return type_list, flags_dict
|
||||||
|
|
||||||
|
|
||||||
def read_VMP_modules(fileobj, read_module_data=True):
|
def read_VMP_modules(fileobj, read_module_data=True):
|
||||||
@@ -543,7 +560,17 @@ class MPRfile:
|
|||||||
enddate - The date when the experiment finished
|
enddate - The date when the experiment finished
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, file_or_path):
|
def __init__(self, file_or_path, error_on_unknown_column: bool = True):
|
||||||
|
"""Pass an EC-lab .mpr file to be parsed.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
file_or_path: Either the open file data or a path to it.
|
||||||
|
error_on_unknown_column: Whether or not to raise an error if an
|
||||||
|
unknown column ID is encountered. A warning will be emited and
|
||||||
|
the column will be added 'unknown_<colID>', with an attempt to read
|
||||||
|
it with a few different dtypes.
|
||||||
|
|
||||||
|
"""
|
||||||
self.loop_index = None
|
self.loop_index = None
|
||||||
if isinstance(file_or_path, str):
|
if isinstance(file_or_path, str):
|
||||||
mpr_file = open(file_or_path, "rb")
|
mpr_file = open(file_or_path, "rb")
|
||||||
@@ -595,8 +622,50 @@ class MPRfile:
|
|||||||
|
|
||||||
assert not any(remaining_headers)
|
assert not any(remaining_headers)
|
||||||
|
|
||||||
self.dtype, self.flags_dict = VMPdata_dtype_from_colIDs(column_types)
|
dtypes, self.flags_dict = VMPdata_dtype_from_colIDs(
|
||||||
self.data = np.frombuffer(main_data, dtype=self.dtype)
|
column_types, error_on_unknown_column=error_on_unknown_column
|
||||||
|
)
|
||||||
|
|
||||||
|
unknown_cols = []
|
||||||
|
# Iteratively work through the unknown columns and try to read them
|
||||||
|
if not error_on_unknown_column:
|
||||||
|
for col, _ in dtypes:
|
||||||
|
if col.startswith("unknown_colID"):
|
||||||
|
unknown_cols.append(col)
|
||||||
|
if len(unknown_cols) > 3:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Too many unknown columns to attempt to read combinatorially: %s"
|
||||||
|
% unknown_cols
|
||||||
|
)
|
||||||
|
|
||||||
|
if unknown_cols:
|
||||||
|
# create a list of all possible combinations of dtypes
|
||||||
|
# for the unknown columns
|
||||||
|
from itertools import product
|
||||||
|
perms = product(UNKNOWN_COLUMN_TYPE_HIERARCHY, repeat=len(unknown_cols))
|
||||||
|
for perm in perms:
|
||||||
|
for unknown_col_ind, c in enumerate(unknown_cols):
|
||||||
|
for ind, (col, _) in enumerate(dtypes):
|
||||||
|
if c == col:
|
||||||
|
dtypes[ind] = (col, perm[unknown_col_ind])
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.dtype = np.dtype(dtypes)
|
||||||
|
self.data = np.frombuffer(main_data, dtype=self.dtype)
|
||||||
|
break
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Unable to read data for unknown columns %s with any of the common dtypes %s",
|
||||||
|
unknown_cols,
|
||||||
|
UNKNOWN_COLUMN_TYPE_HIERARCHY
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.dtype = np.dtype(dtypes)
|
||||||
|
self.data = np.frombuffer(main_data, dtype=self.dtype)
|
||||||
|
|
||||||
assert self.data.shape[0] == n_data_points
|
assert self.data.shape[0] == n_data_points
|
||||||
|
|
||||||
# No idea what these 'column types' mean or even if they are actually
|
# No idea what these 'column types' mean or even if they are actually
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ def test_colID_to_dtype(colIDs, expected):
|
|||||||
return
|
return
|
||||||
expected_dtype = np.dtype(expected)
|
expected_dtype = np.dtype(expected)
|
||||||
dtype, flags_dict = BioLogic.VMPdata_dtype_from_colIDs(colIDs)
|
dtype, flags_dict = BioLogic.VMPdata_dtype_from_colIDs(colIDs)
|
||||||
assert dtype == expected_dtype
|
assert np.dtype(dtype) == expected_dtype
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|||||||
Reference in New Issue
Block a user