mirror of
https://github.com/echemdata/galvani.git
synced 2025-12-14 01:15:34 +00:00
Add mode to attempt to read files with unknown columns and only warn
This commit is contained in:
@@ -13,9 +13,12 @@ from os import SEEK_SET
|
|||||||
import time
|
import time
|
||||||
from datetime import date, datetime, timedelta
|
from datetime import date, datetime, timedelta
|
||||||
from collections import defaultdict, OrderedDict
|
from collections import defaultdict, OrderedDict
|
||||||
|
import warnings
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
UNKNOWN_COLUMN_TYPE_HIERARCHY = ("<f8", "<f4", "<u4", "<u2", "<u1")
|
||||||
|
|
||||||
|
|
||||||
def fieldname_to_dtype(fieldname):
|
def fieldname_to_dtype(fieldname):
|
||||||
"""Converts a column header from the MPT file into a tuple of
|
"""Converts a column header from the MPT file into a tuple of
|
||||||
@@ -428,18 +431,23 @@ def parse_BioLogic_date(date_text):
|
|||||||
return date(tm.tm_year, tm.tm_mon, tm.tm_mday)
|
return date(tm.tm_year, tm.tm_mon, tm.tm_mday)
|
||||||
|
|
||||||
|
|
||||||
def VMPdata_dtype_from_colIDs(colIDs):
|
def VMPdata_dtype_from_colIDs(colIDs, error_on_unknown_column: bool = True):
|
||||||
"""Get a numpy record type from a list of column ID numbers.
|
"""Get a numpy record type from a list of column ID numbers.
|
||||||
|
|
||||||
The binary layout of the data in the MPR file is described by the sequence
|
The binary layout of the data in the MPR file is described by the sequence
|
||||||
of column ID numbers in the file header. This function converts that
|
of column ID numbers in the file header. This function converts that
|
||||||
sequence into a numpy dtype which can then be used to load data from the
|
sequence into a list that can be used with numpy dtype load data from the
|
||||||
file with np.frombuffer().
|
file with np.frombuffer().
|
||||||
|
|
||||||
Some column IDs refer to small values which are packed into a single byte.
|
Some column IDs refer to small values which are packed into a single byte.
|
||||||
The second return value is a dict describing the bit masks with which to
|
The second return value is a dict describing the bit masks with which to
|
||||||
extract these columns from the flags byte.
|
extract these columns from the flags byte.
|
||||||
|
|
||||||
|
If error_on_unknown_column is True, an error will be raised if an unknown
|
||||||
|
column ID is encountered. If it is False, a warning will be emited and attempts
|
||||||
|
will be made to read the column with a few different dtypes.
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
type_list = []
|
type_list = []
|
||||||
field_name_counts = defaultdict(int)
|
field_name_counts = defaultdict(int)
|
||||||
@@ -469,11 +477,19 @@ def VMPdata_dtype_from_colIDs(colIDs):
|
|||||||
unique_field_name = field_name
|
unique_field_name = field_name
|
||||||
type_list.append((unique_field_name, field_type))
|
type_list.append((unique_field_name, field_type))
|
||||||
else:
|
else:
|
||||||
|
if error_on_unknown_column:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
"Column ID {cid} after column {prev} "
|
"Column ID {cid} after column {prev} is unknown".format(
|
||||||
"is unknown".format(cid=colID, prev=type_list[-1][0])
|
cid=colID, prev=type_list[-1][0]
|
||||||
)
|
)
|
||||||
return np.dtype(type_list), flags_dict
|
)
|
||||||
|
warnings.warn(
|
||||||
|
"Unknown column ID %d -- will attempt to read as common dtypes"
|
||||||
|
% colID
|
||||||
|
)
|
||||||
|
type_list.append(("unknown_colID_%d" % colID, UNKNOWN_COLUMN_TYPE_HIERARCHY[0]))
|
||||||
|
|
||||||
|
return type_list, flags_dict
|
||||||
|
|
||||||
|
|
||||||
def read_VMP_modules(fileobj, read_module_data=True):
|
def read_VMP_modules(fileobj, read_module_data=True):
|
||||||
@@ -544,7 +560,17 @@ class MPRfile:
|
|||||||
enddate - The date when the experiment finished
|
enddate - The date when the experiment finished
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, file_or_path):
|
def __init__(self, file_or_path, error_on_unknown_column: bool = True):
|
||||||
|
"""Pass an EC-lab .mpr file to be parsed.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
file_or_path: Either the open file data or a path to it.
|
||||||
|
error_on_unknown_column: Whether or not to raise an error if an
|
||||||
|
unknown column ID is encountered. A warning will be emited and
|
||||||
|
the column will be added 'unknown_<colID>', with an attempt to read
|
||||||
|
it with a few different dtypes.
|
||||||
|
|
||||||
|
"""
|
||||||
self.loop_index = None
|
self.loop_index = None
|
||||||
if isinstance(file_or_path, str):
|
if isinstance(file_or_path, str):
|
||||||
mpr_file = open(file_or_path, "rb")
|
mpr_file = open(file_or_path, "rb")
|
||||||
@@ -596,8 +622,41 @@ class MPRfile:
|
|||||||
|
|
||||||
assert not any(remaining_headers)
|
assert not any(remaining_headers)
|
||||||
|
|
||||||
self.dtype, self.flags_dict = VMPdata_dtype_from_colIDs(column_types)
|
dtypes, self.flags_dict = VMPdata_dtype_from_colIDs(
|
||||||
|
column_types, error_on_unknown_column=error_on_unknown_column
|
||||||
|
)
|
||||||
|
|
||||||
|
unknown_cols = []
|
||||||
|
# Iteratively work through the unknown columns and try to read them
|
||||||
|
if not error_on_unknown_column:
|
||||||
|
for col, _ in dtypes:
|
||||||
|
if col.startswith("unknown_colID"):
|
||||||
|
unknown_cols.append(col)
|
||||||
|
|
||||||
|
if unknown_cols:
|
||||||
|
# create a list of all possible combinations of dtypes
|
||||||
|
# for the unknown columns
|
||||||
|
from itertools import product
|
||||||
|
perms = product(UNKNOWN_COLUMN_TYPE_HIERARCHY, repeat=len(unknown_cols))
|
||||||
|
for perm in perms:
|
||||||
|
for unknown_col_ind, c in enumerate(unknown_cols):
|
||||||
|
for ind, (col, _) in enumerate(dtypes):
|
||||||
|
if c == col:
|
||||||
|
dtypes[ind] = (col, perm[unknown_col_ind])
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.dtype = np.dtype(dtypes)
|
||||||
self.data = np.frombuffer(main_data, dtype=self.dtype)
|
self.data = np.frombuffer(main_data, dtype=self.dtype)
|
||||||
|
break
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
raise RuntimeError("Unable to read data for unknown columns %s with any of the common dtypes %s", unknown_cols, UNKNOWN_COLUMN_TYPE_HIERARCHY)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.dtype = np.dtype(dtypes)
|
||||||
|
self.data = np.frombuffer(main_data, dtype=self.dtype)
|
||||||
|
|
||||||
assert self.data.shape[0] == n_data_points
|
assert self.data.shape[0] == n_data_points
|
||||||
|
|
||||||
# No idea what these 'column types' mean or even if they are actually
|
# No idea what these 'column types' mean or even if they are actually
|
||||||
|
|||||||
Reference in New Issue
Block a user