diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..ac8f3fb --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +# This file will be ignored - see http://flake8.pycqa.org/en/2.6.0/config.html#per-project +# Edit the [flake8] section in tox.ini instead diff --git a/.travis.yml b/.travis.yml index a3627fd..8f313d3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,10 +3,13 @@ language: python cache: directories: - .tox + - .pytest_cache - tests/testdata python: - - "2.7" - "3.5" + - "3.6" + - "3.7" + - "3.8" install: - pip install tox-travis - sh get_testdata.sh diff --git a/README.md b/README.md index c31ed89..d3f2eaf 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,14 @@ Read proprietary file formats from electrochemical test stations Use the `MPRfile` class from BioLogic.py (exported in the main package) +```` +from galvani import BioLogic +import pandas as pd + +mpr_file = BioLogic.MPRfile('test.mpr') +df = pd.DataFrame(mpr_file.data) +```` + ## Arbin .res files ## Use the res2sqlite.py script to convert the .res file to a sqlite3 database diff --git a/galvani/BioLogic.py b/galvani/BioLogic.py index e5bdb17..5e90f19 100644 --- a/galvani/BioLogic.py +++ b/galvani/BioLogic.py @@ -3,25 +3,16 @@ __all__ = ['MPTfileCSV', 'MPTfile'] -import sys import re import csv from os import SEEK_SET import time from datetime import date, datetime, timedelta -from collections import OrderedDict +from collections import defaultdict, OrderedDict import numpy as np -if sys.version_info.major <= 2: - str3 = str - from string import maketrans -else: - str3 = lambda b: str(b, encoding='ascii') - maketrans = bytes.maketrans - - def fieldname_to_dtype(fieldname): """Converts a column header from the MPT file into a tuple of canonical name and appropriate numpy dtype""" @@ -48,13 +39,13 @@ def fieldname_to_dtype(fieldname): raise ValueError("Invalid column header: %s" % fieldname) -def comma_converter(float_string): - """Convert numbers to floats whether the decimal point is '.' or ','""" - trans_table = maketrans(b',', b'.') - return float(float_string.translate(trans_table)) +def comma_converter(float_text): + """Convert text to float whether the decimal point is '.' or ','""" + trans_table = bytes.maketrans(b',', b'.') + return float(float_text.translate(trans_table)) -def MPTfile(file_or_path): +def MPTfile(file_or_path, encoding='ascii'): """Opens .mpt files as numpy record arrays Checks for the correct headings, skips any comments and returns a @@ -70,19 +61,20 @@ def MPTfile(file_or_path): if magic != b'EC-Lab ASCII FILE\r\n': raise ValueError("Bad first line for EC-Lab file: '%s'" % magic) - nb_headers_match = re.match(b'Nb header lines : (\d+)\s*$', next(mpt_file)) + nb_headers_match = re.match(rb'Nb header lines : (\d+)\s*$', + next(mpt_file)) nb_headers = int(nb_headers_match.group(1)) if nb_headers < 3: raise ValueError("Too few header lines: %d" % nb_headers) - ## The 'magic number' line, the 'Nb headers' line and the column headers - ## make three lines. Every additional line is a comment line. + # The 'magic number' line, the 'Nb headers' line and the column headers + # make three lines. Every additional line is a comment line. comments = [next(mpt_file) for i in range(nb_headers - 3)] - fieldnames = str3(next(mpt_file)).strip().split('\t') + fieldnames = next(mpt_file).decode(encoding).strip().split('\t') record_type = np.dtype(list(map(fieldname_to_dtype, fieldnames))) - ## Must be able to parse files where commas are used for decimal points + # Must be able to parse files where commas are used for decimal points converter_dict = dict(((i, comma_converter) for i in range(len(fieldnames)))) mpt_array = np.loadtxt(mpt_file, dtype=record_type, @@ -107,13 +99,13 @@ def MPTfileCSV(file_or_path): if magic.rstrip() != 'EC-Lab ASCII FILE': raise ValueError("Bad first line for EC-Lab file: '%s'" % magic) - nb_headers_match = re.match('Nb header lines : (\d+)\s*$', next(mpt_file)) + nb_headers_match = re.match(r'Nb header lines : (\d+)\s*$', next(mpt_file)) nb_headers = int(nb_headers_match.group(1)) if nb_headers < 3: raise ValueError("Too few header lines: %d" % nb_headers) - ## The 'magic number' line, the 'Nb headers' line and the column headers - ## make three lines. Every additional line is a comment line. + # The 'magic number' line, the 'Nb headers' line and the column headers + # make three lines. Every additional line is a comment line. comments = [next(mpt_file) for i in range(nb_headers - 3)] mpt_csv = csv.DictReader(mpt_file, dialect='excel-tab') @@ -143,86 +135,123 @@ VMPmodule_hdr = np.dtype([('shortname', 'S10'), ('version', ' ?? + 9: ('Ece/V', '/mA', '/V', '/V', '/V', ' 1: + unique_field_name = '%s %d' % (field_name, count) else: - raise NotImplementedError("flag %d not implemented" % colID) - elif colID == 4: - dtype_dict['time/s'] = ', I don't see the difference - elif colID in (6, 77): - dtype_dict['Ewe/V'] = ', 8 is either I or ?? - elif colID in (8, 76): - dtype_dict['I/mA'] = ' 40000 and ole_timestamp1 < 50000: ole_timestamp = ole_timestamp1 elif ole_timestamp2 > 40000 and ole_timestamp2 < 50000: ole_timestamp = ole_timestamp2 elif ole_timestamp3 > 40000 and ole_timestamp3 < 50000: ole_timestamp = ole_timestamp3 + elif ole_timestamp4 > 40000 and ole_timestamp4 < 50000: + ole_timestamp = ole_timestamp4 + else: raise ValueError("Could not find timestamp in the LOG module") @@ -357,17 +412,14 @@ class MPRfile: ole_timedelta = timedelta(days=ole_timestamp[0]) self.timestamp = ole_base + ole_timedelta if self.startdate != self.timestamp.date(): - raise ValueError("""Date mismatch: - Start date: %s - End date: %s - Timestamp: %s""" % (self.startdate, self.enddate, self.timestamp)) + raise ValueError("Date mismatch:\n" + + " Start date: %s\n" % self.startdate + + " End date: %s\n" % self.enddate + + " Timestamp: %s\n" % self.timestamp) def get_flag(self, flagname): if flagname in self.flags_dict: mask, dtype = self.flags_dict[flagname] return np.array(self.data['flags'] & mask, dtype=dtype) - elif flagname in self.flags2_dict: - mask, dtype = self.flags2_dict[flagname] - return np.array(self.data['flags2'] & mask, dtype=dtype) else: raise AttributeError("Flag '%s' not present" % flagname) diff --git a/galvani/__init__.py b/galvani/__init__.py index 1949033..ce98e1d 100644 --- a/galvani/__init__.py +++ b/galvani/__init__.py @@ -1 +1,3 @@ -from .BioLogic import MPTfile, MPRfile +from .BioLogic import MPRfile, MPTfile + +__all__ = ['MPRfile', 'MPTfile'] diff --git a/scripts/res2sqlite.py b/galvani/res2sqlite.py similarity index 67% rename from scripts/res2sqlite.py rename to galvani/res2sqlite.py index c42c16a..eff978a 100755 --- a/scripts/res2sqlite.py +++ b/galvani/res2sqlite.py @@ -7,8 +7,8 @@ import csv import argparse -## The following scripts are adapted from the result of running -## $ mdb-schema oracle +# The following scripts are adapted from the result of running +# $ mdb-schema oracle mdb_tables = ["Version_Table", "Global_Table", "Resume_Table", "Channel_Normal_Table", "Channel_Statistic_Table", @@ -126,7 +126,8 @@ CREATE TABLE Channel_Statistic_Table -- Version 1.14 ends here, version 5.23 continues Charge_Time REAL DEFAULT NULL, Discharge_Time REAL DEFAULT NULL, - FOREIGN KEY (Test_ID, Data_Point) REFERENCES Channel_Normal_Table (Test_ID, Data_Point) + FOREIGN KEY (Test_ID, Data_Point) + REFERENCES Channel_Normal_Table (Test_ID, Data_Point) ); """, "Auxiliary_Table": """ CREATE TABLE Auxiliary_Table @@ -137,7 +138,8 @@ CREATE TABLE Auxiliary_Table Data_Type INTEGER, X REAL, "dX/dt" REAL, - FOREIGN KEY (Test_ID, Data_Point) REFERENCES Channel_Normal_Table (Test_ID, Data_Point) + FOREIGN KEY (Test_ID, Data_Point) + REFERENCES Channel_Normal_Table (Test_ID, Data_Point) ); """, "Event_Table": """ CREATE TABLE Event_Table @@ -220,9 +222,10 @@ CREATE TABLE Smart_Battery_Data_Table ChargingCurrent REAL DEFAULT NULL, ChargingVoltage REAL DEFAULT NULL, ManufacturerData REAL DEFAULT NULL, - FOREIGN KEY (Test_ID, Data_Point) REFERENCES Channel_Normal_Table (Test_ID, Data_Point) + FOREIGN KEY (Test_ID, Data_Point) + REFERENCES Channel_Normal_Table (Test_ID, Data_Point) ); """, - ## The following tables are not present in version 1.14 + # The following tables are not present in version 1.14 'MCell_Aci_Data_Table': """ CREATE TABLE MCell_Aci_Data_Table ( @@ -233,7 +236,8 @@ CREATE TABLE MCell_Aci_Data_Table Phase_Shift REAL, Voltage REAL, Current REAL, - FOREIGN KEY (Test_ID, Data_Point) REFERENCES Channel_Normal_Table (Test_ID, Data_Point) + FOREIGN KEY (Test_ID, Data_Point) + REFERENCES Channel_Normal_Table (Test_ID, Data_Point) );""", 'Aux_Global_Data_Table': """ CREATE TABLE Aux_Global_Data_Table @@ -288,7 +292,8 @@ CREATE TABLE Smart_Battery_Clock_Stretch_Table VCELL3 INTEGER, VCELL2 INTEGER, VCELL1 INTEGER, - FOREIGN KEY (Test_ID, Data_Point) REFERENCES Channel_Normal_Table (Test_ID, Data_Point) + FOREIGN KEY (Test_ID, Data_Point) + REFERENCES Channel_Normal_Table (Test_ID, Data_Point) );"""} mdb_create_indices = { @@ -306,18 +311,21 @@ CREATE TEMPORARY TABLE capacity_helper( Discharge_Capacity REAL NOT NULL, Charge_Energy REAL NOT NULL, Discharge_Energy REAL NOT NULL, - FOREIGN KEY (Test_ID, Cycle_Index) REFERENCES Channel_Normal_Table (Test_ID, Cycle_Index) + FOREIGN KEY (Test_ID, Cycle_Index) + REFERENCES Channel_Normal_Table (Test_ID, Cycle_Index) ); -INSERT INTO capacity_helper - SELECT Test_ID, Cycle_Index, max(Charge_Capacity), max(Discharge_Capacity), max(Charge_Energy), max(Discharge_Energy) - FROM Channel_Normal_Table +INSERT INTO capacity_helper + SELECT Test_ID, Cycle_Index, + max(Charge_Capacity), max(Discharge_Capacity), + max(Charge_Energy), max(Discharge_Energy) + FROM Channel_Normal_Table GROUP BY Test_ID, Cycle_Index; --- ## Alternative way of selecting ## --- select * --- from Channel_Normal_Table as a join Channel_Normal_Table as b --- on (a.Test_ID = b.Test_ID and a.Data_Point = b.Data_Point + 1 +-- ## Alternative way of selecting ## +-- select * +-- from Channel_Normal_Table as a join Channel_Normal_Table as b +-- on (a.Test_ID = b.Test_ID and a.Data_Point = b.Data_Point + 1 -- and a.Charge_Capacity < b.Charge_Capacity); DROP TABLE IF EXISTS Capacity_Sum_Table; @@ -328,12 +336,15 @@ CREATE TABLE Capacity_Sum_Table( Discharge_Capacity_Sum REAL NOT NULL, Charge_Energy_Sum REAL NOT NULL, Discharge_Energy_Sum REAL NOT NULL, - FOREIGN KEY (Test_ID, Cycle_Index) REFERENCES Channel_Normal_Table (Test_ID, Cycle_Index) + FOREIGN KEY (Test_ID, Cycle_Index) + REFERENCES Channel_Normal_Table (Test_ID, Cycle_Index) ); -INSERT INTO Capacity_Sum_Table - SELECT a.Test_ID, a.Cycle_Index, total(b.Charge_Capacity), total(b.Discharge_Capacity), total(b.Charge_Energy), total(b.Discharge_Energy) - FROM capacity_helper AS a LEFT JOIN capacity_helper AS b +INSERT INTO Capacity_Sum_Table + SELECT a.Test_ID, a.Cycle_Index, + total(b.Charge_Capacity), total(b.Discharge_Capacity), + total(b.Charge_Energy), total(b.Discharge_Energy) + FROM capacity_helper AS a LEFT JOIN capacity_helper AS b ON (a.Test_ID = b.Test_ID AND a.Cycle_Index > b.Cycle_Index) GROUP BY a.Test_ID, a.Cycle_Index; @@ -342,95 +353,133 @@ DROP TABLE capacity_helper; CREATE VIEW IF NOT EXISTS Capacity_View AS SELECT Test_ID, Data_Point, Test_Time, Step_Time, DateTime, Step_Index, Cycle_Index, Current, Voltage, "dV/dt", - Discharge_Capacity + Discharge_Capacity_Sum - Charge_Capacity - Charge_Capacity_Sum AS Net_Capacity, - Discharge_Capacity + Discharge_Capacity_Sum + Charge_Capacity + Charge_Capacity_Sum AS Gross_Capacity, - Discharge_Energy + Discharge_Energy_Sum - Charge_Energy - Charge_Energy_Sum AS Net_Energy, - Discharge_Energy + Discharge_Energy_Sum + Charge_Energy + Charge_Energy_Sum AS Gross_Energy + ( (Discharge_Capacity + Discharge_Capacity_Sum) + - (Charge_Capacity + Charge_Capacity_Sum) ) AS Net_Capacity, + ( (Discharge_Capacity + Discharge_Capacity_Sum) + + (Charge_Capacity + Charge_Capacity_Sum) ) AS Gross_Capacity, + ( (Discharge_Energy + Discharge_Energy_Sum) + - (Charge_Energy + Charge_Energy_Sum) ) AS Net_Energy, + ( (Discharge_Energy + Discharge_Energy_Sum) + + (Charge_Energy + Charge_Energy_Sum) ) AS Gross_Energy FROM Channel_Normal_Table NATURAL JOIN Capacity_Sum_Table; """ -def mdb_get_data_text(filename, table): +def mdb_get_data_text(s3db, filename, table): print("Reading %s..." % table) + insert_pattern = re.compile( + r'INSERT INTO "\w+" \([^)]+?\) VALUES \(("[^"]*"|[^")])+?\);\n', + re.IGNORECASE + ) try: - mdb_sql = sp.Popen(['mdb-export', '-I', 'postgres', filename, table], - bufsize=-1, stdin=None, stdout=sp.PIPE, - universal_newlines=True) - mdb_output = mdb_sql.stdout.read() - while len(mdb_output) > 0: - insert_match = re.match(r'INSERT INTO "\w+" \([^)]+?\) VALUES \(("[^"]*"|[^")])+?\);\n', - mdb_output, re.IGNORECASE) - s3db.execute(insert_match.group()) - mdb_output = mdb_output[insert_match.end():] - s3db.commit() - except: + # Initialize values to avoid NameError in except clause + mdb_output = '' + insert_match = None + with sp.Popen(['mdb-export', '-I', 'postgres', filename, table], + bufsize=-1, stdin=sp.DEVNULL, stdout=sp.PIPE, + universal_newlines=True) as mdb_sql: + + mdb_output = mdb_sql.stdout.read() + while len(mdb_output) > 0: + insert_match = insert_pattern.match(mdb_output) + s3db.execute(insert_match.group()) + mdb_output = mdb_output[insert_match.end():] + mdb_output += mdb_sql.stdout.read() + s3db.commit() + + except OSError as e: + if e.errno == 2: + raise RuntimeError('Could not locate the `mdb-export` executable. ' + 'Check that mdbtools is properly installed.') + else: + raise + except BaseException: print("Error while importing %s" % table) - print("Remaining mdb-export output:", mdb_output) + if mdb_output: + print("Remaining mdb-export output:", mdb_output) if insert_match: print("insert_re match:", insert_match) raise - finally: - mdb_sql.terminate() -def mdb_get_data_numeric(filename, table): +def mdb_get_data_numeric(s3db, filename, table): print("Reading %s..." % table) try: - mdb_sql = sp.Popen(['mdb-export', filename, table], - bufsize=-1, stdin=None, stdout=sp.PIPE, - universal_newlines=True) - mdb_csv = csv.reader(mdb_sql.stdout) - mdb_headers = next(mdb_csv) - quoted_headers = ['"%s"' % h for h in mdb_headers] - joined_headers = ', '.join(quoted_headers) - joined_placemarks = ', '.join(['?' for h in mdb_headers]) - insert_stmt = 'INSERT INTO "{0}" ({1}) VALUES ({2});'.format(table, - joined_headers, joined_placemarks) - s3db.executemany(insert_stmt, mdb_csv) - s3db.commit() - finally: - mdb_sql.terminate() + with sp.Popen(['mdb-export', filename, table], + bufsize=-1, stdin=sp.DEVNULL, stdout=sp.PIPE, + universal_newlines=True) as mdb_sql: + mdb_csv = csv.reader(mdb_sql.stdout) + mdb_headers = next(mdb_csv) + quoted_headers = ['"%s"' % h for h in mdb_headers] + joined_headers = ', '.join(quoted_headers) + joined_placemarks = ', '.join(['?' for h in mdb_headers]) + insert_stmt = 'INSERT INTO "{0}" ({1}) VALUES ({2});'.format( + table, + joined_headers, + joined_placemarks, + ) + s3db.executemany(insert_stmt, mdb_csv) + s3db.commit() + except OSError as e: + if e.errno == 2: + raise RuntimeError('Could not locate the `mdb-export` executable. ' + 'Check that mdbtools is properly installed.') + else: + raise -def mdb_get_data(filename, table): +def mdb_get_data(s3db, filename, table): if table in mdb_tables_text: - mdb_get_data_text(filename, table) + mdb_get_data_text(s3db, filename, table) elif table in mdb_tables_numeric: - mdb_get_data_numeric(filename, table) + mdb_get_data_numeric(s3db, filename, table) else: raise ValueError("'%s' is in neither mdb_tables_text nor mdb_tables_numeric" % table) -## Main part of the script +def convert_arbin_to_sqlite(input_file, output_file): + """Read data from an Arbin .res data file and write to a sqlite file. -parser = argparse.ArgumentParser(description="Convert Arbin .res files to sqlite3 databases using mdb-export") -parser.add_argument('input_file', type=str) # need file name to pass to sp.Popen -parser.add_argument('output_file', type=str) # need file name to pass to sqlite3.connect + Any data currently in the sqlite file will be erased! + """ + s3db = sqlite3.connect(output_file) -args = parser.parse_args() + for table in reversed(mdb_tables + mdb_5_23_tables): + s3db.execute('DROP TABLE IF EXISTS "%s";' % table) -s3db = sqlite3.connect(args.output_file) - - -for table in reversed(mdb_tables + mdb_5_23_tables): - s3db.execute('DROP TABLE IF EXISTS "%s";' % table) - -for table in mdb_tables: - s3db.executescript(mdb_create_scripts[table]) - mdb_get_data(args.input_file, table) - if table in mdb_create_indices: - print("Creating indices for %s..." % table) - s3db.executescript(mdb_create_indices[table]) - -if (s3db.execute("SELECT Version_Schema_Field FROM Version_Table;").fetchone()[0] == "Results File 5.23"): - for table in mdb_5_23_tables: + for table in mdb_tables: s3db.executescript(mdb_create_scripts[table]) - mdb_get_data(args.input_file, table) + mdb_get_data(s3db, input_file, table) if table in mdb_create_indices: + print("Creating indices for %s..." % table) s3db.executescript(mdb_create_indices[table]) -print("Creating helper table for capacity and energy totals...") -s3db.executescript(helper_table_script) + csr = s3db.execute("SELECT Version_Schema_Field FROM Version_Table;") + version_text, = csr.fetchone() + if (version_text == "Results File 5.23"): + for table in mdb_5_23_tables: + s3db.executescript(mdb_create_scripts[table]) + mdb_get_data(input_file, table) + if table in mdb_create_indices: + s3db.executescript(mdb_create_indices[table]) -print("Vacuuming database...") -s3db.executescript("VACUUM; ANALYZE;") + print("Creating helper table for capacity and energy totals...") + s3db.executescript(helper_table_script) + + print("Vacuuming database...") + s3db.executescript("VACUUM; ANALYZE;") + + +def main(argv=None): + parser = argparse.ArgumentParser( + description="Convert Arbin .res files to sqlite3 databases using mdb-export", + ) + parser.add_argument('input_file', type=str) # need file name to pass to sp.Popen + parser.add_argument('output_file', type=str) # need file name to pass to sqlite3.connect + + args = parser.parse_args(argv) + convert_arbin_to_sqlite(args.input_file, args.output_file) + + +if __name__ == '__main__': + main() diff --git a/get_testdata.sh b/get_testdata.sh index 639930e..792223a 100755 --- a/get_testdata.sh +++ b/get_testdata.sh @@ -7,20 +7,21 @@ mkdir -p tests/testdata cd tests/testdata /usr/bin/wget --continue -i - </mA", "(Q-Qo)/mA.h", "x"]) + assert comments == [] + assert mpt1.fieldnames == [ + "mode", "ox/red", "error", "control changes", "Ns changes", + "counter inc.", "time/s", "control/V/mA", "Ewe/V", "dq/mA.h", "P/W", + "/mA", "(Q-Qo)/mA.h", "x", + ] -@raises(ValueError) -def test_open_MPT_csv_fails_for_bad_file(): - mpt1 = MPTfileCSV(os.path.join(testdata_dir, 'bio_logic1.mpr')) +def test_open_MPT_csv_fails_for_bad_file(testdata_dir): + with pytest.raises((ValueError, UnicodeDecodeError)): + MPTfileCSV(os.path.join(testdata_dir, 'bio_logic1.mpr')) -def test_open_MPR1(): - mpr1 = MPRfile(os.path.join(testdata_dir, 'bio_logic1.mpr')) - ## Check the dates as a basic test that it has been read properly - eq_(mpr1.startdate, date(2011, 10, 29)) - eq_(mpr1.enddate, date(2011, 10, 31)) +def test_colID_map_uniqueness(): + """Check some uniqueness properties of the VMPdata_colID_xyz maps.""" + field_colIDs = set(BioLogic.VMPdata_colID_dtype_map.keys()) + flag_colIDs = set(BioLogic.VMPdata_colID_flag_map.keys()) + field_names = [v[0] for v in BioLogic.VMPdata_colID_dtype_map.values()] + flag_names = [v[0] for v in BioLogic.VMPdata_colID_flag_map.values()] + assert not field_colIDs.intersection(flag_colIDs) + # 'I/mA' and 'dQ/mA.h' are duplicated + # assert len(set(field_names)) == len(field_names) + assert len(set(flag_names)) == len(flag_names) + assert not set(field_names).intersection(flag_names) -def test_open_MPR2(): - mpr2 = MPRfile(os.path.join(testdata_dir, 'bio_logic2.mpr')) - ## Check the dates as a basic test that it has been read properly - eq_(mpr2.startdate, date(2012, 9, 27)) - eq_(mpr2.enddate, date(2012, 9, 27)) +@pytest.mark.parametrize('colIDs, expected', [ + ([1, 2, 3], [('flags', 'u1')]), + ([4, 6], [('time/s', '