Source code for cellpy.readers.instruments.pec_csv

"""pec csv-type data files"""
import logging
import os
import warnings
from datetime import datetime

import numpy as np
import pandas as pd
from dateutil.parser import parse

from cellpy.parameters.internal_settings import get_headers_normal
from cellpy.readers.core import Data, FileID, humanize_bytes
from cellpy.readers.instruments.base import BaseLoader

pec_headers_normal = dict()

pec_headers_normal["step_index_txt"] = "Step"
pec_headers_normal["cycle_index_txt"] = "Cycle"
pec_headers_normal["test_time_txt"] = "Total_Time_Seconds"  # This might change
pec_headers_normal["step_time_txt"] = "Step_Time_Seconds"  # This might change
pec_headers_normal["datetime_txt"] = "Real_Time"
pec_headers_normal["voltage_txt"] = "Voltage_mV"  # This might change
pec_headers_normal["current_txt"] = "Current_mA"  # This might change
pec_headers_normal["charge_capacity_txt"] = "Charge_Capacity_mAh"
pec_headers_normal["discharge_capacity_txt"] = "Discharge_Capacity_mAh"
pec_headers_normal["charge_energy_txt"] = "Charge_Capacity_mWh"
pec_headers_normal["discharge_energy_txt"] = "Discharge_Capacity_mWh"
pec_headers_normal["internal_resistance_txt"] = "Internal_Resistance_1_mOhm"
pec_headers_normal["test_id_txt"] = "Test"


# TODO: better reading of first part of the file (comments and headers)
#  1. find the units
#  2. convert cycle and step numbers so that they start with 1 and not 0
#  3. find user-defined variables


[docs]class DataLoader(BaseLoader):
    """Main loading class"""

    instrument_name = "pec_csv"
    raw_ext = "csv"

    def __init__(self, *args, **kwargs):
        self.headers_normal = (
            get_headers_normal()
        )  # should consider moving this to the Loader class
        self.current_chunk = 0  # use this to set chunks to load
        self.pec_data = None
        self.pec_log = None
        self.pec_settings = None
        self.variable_header_keywords = [
            "Voltage (V)",
            "Current (A)",
        ]  # The unit of these will be read from file
        self.fake_header_length = [
            "#RESULTS CHECK\n",
            "#END RESULTS CHECK\n",
        ]  # Ignores number of delimiters in between
        self.pec_file_delimiter = ","
        self.number_of_header_lines = None  # Number of header lines is not constant
        self.cellpy_headers = (
            get_headers_normal()
        )  # should consider to move this to the Loader class

    # @staticmethod
    # def _get_pec_units():
    #    pec_units = dict()
    #    pec_units["voltage"] = 0.001  # V
    #    pec_units["current"] = 0.001  # A
    #    pec_units["charge"] = 0.001  # Ah
    #    pec_units["mass"] = 0.001  # g
    #    pec_units["energy"] = 0.001  # Wh

    #    return pec_units

    def _get_pec_units(self):  # Fetches units from a csv file
        # Mapping prefixes to values
        prefix = {"µ": 10**-6, "m": 10**-3, "": 1}

        # Adding the non-variable units to the return value
        pec_units = {"charge": 0.001, "mass": 0.001, "energy": 0.001}  # Ah  # g  # Wh

        # A list with all the variable keywords without any prefixes, used as search terms
        header = self.variable_header_keywords

        data = pd.read_csv(
            self.temp_file_path, skiprows=self.number_of_header_lines, nrows=1
        )

        # Searching for the prefix for all the variable units
        for item in data.keys():
            for unit in header:
                x = unit.find("(") - len(unit)
                if unit[: x + 1] in item:
                    y = item[x].replace("(", "")
                    # Adding units conversion factor to return value, renaming the headers to include correct units
                    if header.index(unit) == 0:
                        pec_units["voltage"] = prefix.get(y)
                        pec_headers_normal["voltage_txt"] = f"Voltage_{y}V"
                    elif header.index(unit) == 1:
                        pec_units["current"] = prefix.get(y)
                        pec_headers_normal["current_txt"] = f"Current_{y}A"
        return pec_units

    def _get_pec_times(self):
        # Mapping units to their conversion values
        logging.debug("retrieve pec units")
        units = {
            "(Hours in hh:mm:ss.xxx)": self.timestamp_to_seconds,
            "(Decimal Hours)": 3600,
            "(Minutes)": 60,
            "(Seconds)": 1,
        }

        data = pd.read_csv(
            self.temp_file_path, skiprows=self.number_of_header_lines, nrows=0
        )
        pec_times = dict()

        # Adds the time variables and their units to the pec_times dictonary return value
        # Also updates the column headers in pec_headers_normal with the correct name
        for item in data.keys():
            for unit in units:
                if unit in item:
                    x = item.find("(")
                    var = item[: x - 1].lower().replace(" ", "_")
                    its_unit = item[x:]
                    pec_times[var] = units.get(its_unit)
                    if var == "total_time":
                        pec_headers_normal[
                            "test_time_txt"
                        ] = f'Total_Time_{its_unit[1:-1].replace(" ", "_")}'
                    if var == "step_time":
                        pec_headers_normal[
                            "step_time_txt"
                        ] = f'Step_Time_{its_unit[1:-1].replace(" ", "_")}'
        return pec_times

[docs]    @staticmethod
    def get_raw_units():
        """Include the settings for the units used by the instrument.

        The units are defined w.r.t. the SI units ('unit-fractions'; currently only units that are multiples of
        Si units can be used). For example, for current defined in mA, the value for the
        current unit-fraction will be 0.001.

        Returns: dictionary containing the unit-fractions for current, charge, and mass

        """

        raw_units = dict()
        raw_units["current"] = "A"
        raw_units["charge"] = "Ah"
        raw_units["mass"] = "mg"
        raw_units["voltage"] = "V"
        raw_units["energy"] = "Wh"
        raw_units["time"] = "s"

        return raw_units

    @staticmethod
    def _raw_units_for_internal_calculations():
        raw_units = dict()
        raw_units["current"] = 1.0
        raw_units["charge"] = 1.0
        raw_units["mass"] = 0.001
        raw_units["voltage"] = 1.0
        raw_units["energy"] = 1.0
        raw_units["time"] = 1.0
        return raw_units

[docs]    def get_raw_limits(self):
        """Include the settings for how to decide what kind of step you are examining here.

        The raw limits are 'epsilons' used to check if the current and/or voltage is stable (for example
        for galvanostatic steps, one would expect that the current is stable (constant) and non-zero).
        It is expected that different instruments (with different resolution etc.) have different
        'epsilons'.

        Returns: the raw limits (dict)

        """
        warnings.warn("raw limits have not been subject for testing yet")
        raw_limits = dict()
        raw_limits["current_hard"] = 0.1  # There is a bug in PEC
        raw_limits["current_soft"] = 1.0
        raw_limits["stable_current_hard"] = 2.0
        raw_limits["stable_current_soft"] = 4.0
        raw_limits["stable_voltage_hard"] = 2.0
        raw_limits["stable_voltage_soft"] = 4.0
        raw_limits["stable_charge_hard"] = 2.0
        raw_limits["stable_charge_soft"] = 5.0
        raw_limits["ir_change"] = 0.00001
        return raw_limits

[docs]    def loader(self, file_name, bad_steps=None, **kwargs):
        # self.name = file_name
        # self.copy_to_temporary()
        self.number_of_header_lines = self._find_header_length()
        data = Data()
        self.generate_fid()
        data.raw_data_files.append(self.fid)

        # div parameters and information (probably load this last)
        data.loaded_from = self.name

        # some overall prms
        data.channel_index = None
        data.creator = None
        data.schedule_file_name = None
        data.test_ID = None
        data.test_name = None

        # --------- read raw-data (normal-data) -------------------------
        self._load_pec_data(bad_steps)
        data.start_datetime = self.pec_settings["start_time"]
        length_of_test = self.pec_data.shape[0]
        logging.debug(f"length of test: {length_of_test}")

        logging.debug("renaming columns")
        self._rename_headers()
        self._convert_units()

        # cycle indices should not be 0
        if 0 in self.pec_data["cycle_index"]:
            self.pec_data["cycle_index"] += 1

        data.raw = self.pec_data
        data.raw_data_files_length.append(length_of_test)

        return data

    def _load_pec_data(self, bad_steps):
        if bad_steps is not None:
            warnings.warn("bad_steps is not implemented yet for this instrument")
        file_name = self.temp_file_path
        number_of_header_lines = self.number_of_header_lines

        # ----------------- reading the data ---------------------
        df = pd.read_csv(file_name, skiprows=number_of_header_lines)

        # get rid of unnamed columns
        df = df.loc[:, ~df.columns.str.contains("^Unnamed")]

        # get rid of spaces, parenthesis, and the deg-sign
        new_column_headers = {
            c: c.replace(" ", "_")
            .replace("(", "")
            .replace(")", "")
            .replace("°", "")
            .replace(r"%", "pct")
            for c in df.columns
        }
        df.rename(columns=new_column_headers, inplace=True)

        # add missing columns
        df.insert(0, self.headers_normal.data_point_txt, range(len(df)))
        df[self.headers_normal.sub_step_index_txt] = 0
        df[self.headers_normal.sub_step_time_txt] = 0

        self.pec_data = df

        # ----------------  reading the parameters ---------------
        with open(file_name, "r") as ofile:
            counter = 0
            lines = []
            for line in ofile:
                counter += 1
                if counter > number_of_header_lines:
                    break
                lines.append(line)
        self._extract_variables(lines)

    def _extract_variables(self, lines):
        header_comments = dict()
        comment_loop = False
        for line_number, line in enumerate(lines):
            if line.startswith("#"):
                if not comment_loop:
                    comment_loop = True
                else:
                    comment_loop = False

            else:
                if not comment_loop:
                    parts = line.split(",")
                    variable = parts[0].strip()
                    variable = variable.strip(":")
                    variable = variable.replace(" ", "_")
                    try:
                        value = parts[1].strip()
                    except IndexError:
                        value = None

                    if not value:
                        value = np.nan
                    header_comments[variable] = value
        logging.debug(" Headers Dict ")
        logging.debug(header_comments)

        headers = dict()

        start_time = parse(header_comments["Start_Time"])
        end_time = parse(header_comments["End_Time"])

        headers["start_time"] = start_time
        headers["end_time"] = end_time
        # headers["test_regime_name"] = header_comments["TestRegime_Name"]

        self.pec_settings = headers

    def _rename_headers(self):
        logging.debug("Trying to rename the columns")
        # logging.debug("Current columns:")
        # logging.debug(self.pec_data.columns)
        # logging.debug("Rename to:")
        # logging.debug(self.headers_normal)

        for key in pec_headers_normal:
            self._rename_header(key, pec_headers_normal[key])

        # logging.debug("New cols:")
        # logging.debug(self.pec_data.columns)

    def _convert_units(self):
        logging.debug("Trying to convert all data into correct units")
        logging.debug("- dtypes")
        self.pec_data[self.headers_normal.datetime_txt] = pd.to_datetime(
            self.pec_data[self.headers_normal.datetime_txt]
        )

        self.pec_data["Position_Start_Time"] = pd.to_datetime(
            self.pec_data["Position_Start_Time"]
        )

        self.pec_data["Rack"] = self.pec_data["Rack"].astype("category")

        logging.debug("- cellpy units")
        pec_units = self._get_pec_units()
        pec_times = self._get_pec_times()
        raw_units = self._raw_units_for_internal_calculations()
        self._rename_headers()  # Had to run this again after fixing the headers, might be a better way to fix this

        _v = pec_units["voltage"] / raw_units["voltage"]
        _i = pec_units["current"] / raw_units["current"]
        _c = pec_units["charge"] / raw_units["charge"]
        _w = pec_units["energy"] / raw_units["energy"]

        # Check if time is given in a units proportional to seconds or in a hh:mm:ss.xxx format
        # Convert all hh:mm:ss.xxx formats to seconds using self.timestamp_to_seconds()
        relevant_times = ["total_time", "step_time"]
        for x in relevant_times:
            if isinstance(pec_times[x], (int, float)):
                if x == relevant_times[0]:
                    _tt = pec_times["total_time"] / raw_units["time"]
                    self.pec_data[self.headers_normal.test_time_txt] *= _tt
                elif x == relevant_times[1]:
                    _st = pec_times["step_time"] / raw_units["time"]
                    self.pec_data[self.headers_normal.step_time_txt] *= _st
            elif callable(pec_times[x]):
                # EDIT jepe 18.06.2020: change to .apply(func) instead of for-loop
                # (now the column is of float64 type and behaves properly)
                if x == relevant_times[0]:
                    # col = self.pec_data[self.headers_normal.test_time_txt]
                    hdr = self.headers_normal.test_time_txt
                elif x == relevant_times[1]:
                    # col = self.pec_data[self.headers_normal.step_time_txt]
                    hdr = self.headers_normal.test_time_txt
                self.pec_data[hdr] = self.pec_data[hdr].apply(pec_times[x])
                # for i in range(len(col)):
                #     col[i] = pec_times[x](col[i])

        v_txt = self.headers_normal.voltage_txt
        i_txt = self.headers_normal.current_txt

        self.pec_data[v_txt] *= _v
        self.pec_data[i_txt] *= _i

        self.pec_data[self.headers_normal.charge_capacity_txt] *= _c
        self.pec_data[self.headers_normal.discharge_capacity_txt] *= _c
        self.pec_data[self.headers_normal.charge_energy_txt] *= _w
        self.pec_data[self.headers_normal.discharge_energy_txt] *= _w

    def _rename_header(self, h_old, h_new):
        try:
            self.pec_data.rename(
                columns={h_new: self.cellpy_headers[h_old]}, inplace=True
            )
        except KeyError as e:
            logging.info(f"Problem during conversion to cellpy-format ({e})")

    def _find_header_length(self):
        skiprows = 0
        resultscheck = False  # Ignore number of delimiters inside RESULTS CHECK

        with open(self.temp_file_path, "r") as header:
            for line in header:
                if line in self.fake_header_length:
                    resultscheck = not resultscheck
                if (
                    line.count(self.pec_file_delimiter) > 1 and not resultscheck
                ):  # End when there are >2 columns
                    break
                skiprows += 1

        return skiprows

[docs]    @staticmethod
    def timestamp_to_seconds(timestamp):  # Changes hh:mm:s.xxx time format to seconds
        total_secs = 0
        # strptime can not handle more than 24 hours, days are counted manually
        hours = int(timestamp[:2])
        if hours >= 24:
            days = hours // 24
            total_secs += days * 3600 * 24
            timestamp = str(hours - 24 * days) + timestamp[2:]
        total_secs += (
            datetime.strptime(timestamp, "%H:%M:%S.%f")
            - datetime.strptime("00:00:00.000", "%H:%M:%S.%f")
        ).total_seconds()
        return total_secs


if __name__ == "__main__":
    pass