Source code for cellpy.readers.instruments.custom

"""This module is used for loading data using the `instrument="custom"` method.
If no `instrument_file` is given (either directly or through the use
of the :: separator), the default instrument file (yaml) will be used."""

# This module works, but is by no means finished. The module is meant to
# be developed further allowing for example
# to provide custom parsers. At the moment, we anticipate that it only should
# work with txt-files (so the class is called CustomTxtLoader), however, it is
# possible to extend the scope to allow for providing parsers that also can read
# binary files. The future will show.

import logging
import sys
from abc import ABC
from pathlib import Path

import pandas as pd

from cellpy import prms
from cellpy.readers.instruments.base import find_delimiter_and_start, AutoLoader
from cellpy.readers.instruments.configurations import (
    register_local_configuration_from_yaml_file,
)


[docs]class DataLoader(AutoLoader, ABC):
    """Class for loading data from txt files."""

    instrument_name = "custom"
    raw_ext = "*"

    def __init__(self, instrument_file=None, **kwargs):
        if instrument_file is None:
            logging.debug("No instrument_file provided - checking default")
            instrument_file = prms.Instruments.custom_instrument_definitions_file
        if not instrument_file:
            raise FileExistsError(
                "Missing instrument definition file "
                "(not given and not defined in config)"
            )
        if not Path(instrument_file).is_file():
            # searching in the Instruments folder:
            instrument_dir = Path(prms.Paths.instrumentdir)
            logging.debug(f"Looking for file in {instrument_dir}")
            instrument_file_in_instrument_dir = instrument_dir / instrument_file
            if not instrument_file_in_instrument_dir.is_file():
                logging.debug(f"Could not find {instrument_file_in_instrument_dir}")
                raise FileExistsError(
                    "Instrument definition file not found! " f"({instrument_file})"
                )
            instrument_file = instrument_file_in_instrument_dir

        logging.debug(f"Instrument definition file: {instrument_file}")
        self.local_instrument_file = instrument_file
        super().__init__()

    default_model = None
    supported_models = None

[docs]    def pre_init(self):
        self.auto_register_config = False
        self.config_params = register_local_configuration_from_yaml_file(
            self.local_instrument_file
        )

    # TODO: rewrite this:
[docs]    def parse_loader_parameters(self, **kwargs):
        auto_format = kwargs.get("auto_format", False)
        if auto_format:
            self._auto_formatter()

    def _config_sub_parser(self, key_label, default_value=None, **kwargs):
        return kwargs.pop(
            key_label, self.config_params.formatters.get(key_label, default_value)
        )

    # TODO: rewrite this:
[docs]    def parse_formatter_parameters(self, **kwargs):
        self.file_format = self._config_sub_parser(
            "file_format", default_value="csv", **kwargs
        )
        # print("FORMATTERS".center(80, "="))
        # print(self.config_params.formatters)

        # rewrite this on a later stage to use functions and dict lookup instead of if - else
        if self.file_format == "csv":
            self.sep = self._config_sub_parser("sep", default_value=None, **kwargs)
            self.skiprows = self._config_sub_parser(
                "skiprows", default_value=0, **kwargs
            )
            self.header = self._config_sub_parser("header", default_value=0, **kwargs)
            self.encoding = self._config_sub_parser(
                "encoding", default_value="utf-8", **kwargs
            )
            self.decimal = self._config_sub_parser(
                "decimal", default_value=".", **kwargs
            )
            self.thousands = self._config_sub_parser(
                "thousands", default_value=None, **kwargs
            )

        elif self.file_format == "xlsx":
            self.table_name = self._config_sub_parser(
                "table_name", default_value="sheet 1", **kwargs
            )

        elif self.file_format == "xls":
            self.table_name = self._config_sub_parser(
                "table_name", default_value="sheet 1", **kwargs
            )

        elif self.file_format == "json":
            print("json not implemented yet")
            sys.exit()

        else:
            print(f"{self.file_format} not implemented yet")
            sys.exit()

    # TODO: consider rewriting this:
    def _auto_formatter(self):
        separator, first_index = find_delimiter_and_start(
            self.name,
            separators=None,
            checking_length_header=100,
            checking_length_whole=200,
        )
        self.encoding = "UTF-8"  # consider adding a find_encoding function
        self.sep = separator
        self.skiprows = first_index - 1  # consider adding a find_rows_to_skip function
        self.header = 0  # consider adding a find_header function

        logging.critical(
            f"auto-formatting:\n  {self.sep=}\n  {self.skiprows=}\n  {self.header=}\n  {self.encoding=}\n"
        )

[docs]    def query_file(self, name):
        # rewrite this on a later stage to use functions and dict lookup instead of if - else
        if self.file_format == "csv":
            logging.debug(f"parsing with pandas.read_csv: {name}")
            logging.critical(
                f"{self.sep=}, {self.skiprows=}, {self.header=}, {self.encoding=}, {self.decimal=}"
            )
            data_df = pd.read_csv(
                name,
                sep=self.sep,
                skiprows=self.skiprows,
                header=self.header,
                encoding=self.encoding,
                decimal=self.decimal,
                thousands=self.thousands,
            )
        elif self.file_format == "xls":
            logging.debug(
                f"parsing with pandas.read_excel using xlrd (old format): {name}"
            )
            sheet_name = self.table_name

            raw_frame = pd.read_excel(name, engine="xlrd", sheet_name=None)
            matching = [s for s in raw_frame.keys() if s.startswith(sheet_name)]
            if matching:
                return raw_frame[matching[0]]
            raise IOError(f"Could not find the sheet {sheet_name} in {name}")

        elif self.file_format == "xlsx":
            logging.debug(f"parsing with pandas.read_excel: {name}")
            sheet_name = self.table_name
            raw_frame = pd.read_excel(
                name, engine="openpyxl", sheet_name=None
            )  # TODO: replace this with pd.ExcelReader
            matching = [s for s in raw_frame.keys() if s.startswith(sheet_name)]
            if matching:
                logging.debug(f"read sheet: {sheet_name}")
                return raw_frame[matching[0]]
            raise IOError(f"Could not find the sheet {sheet_name} in {name}")

        elif self.file_format == "json":
            raise IOError(
                f"Could not read {name}, {self.file_format} not supported yet"
            )
        else:
            raise IOError(
                f"Could not read {name}, {self.file_format} not supported yet"
            )

        return data_df


[docs]def check_loader_from_outside_with_get():
    import pathlib

    import cellpy

    pd.options.display.max_columns = 100

    base_path_win = pathlib.Path("C:/scripting")
    base_path_mac = pathlib.Path("/Users/jepe/scripting")

    if sys.platform == "win32":
        base_path = base_path_win
        out = pathlib.Path(r"C:\scripting\trash")
    else:
        base_path = base_path_mac
        out = pathlib.Path("/Users/jepe/tmp")

    instrument = "custom"

    file_number = 3

    if file_number == 1:
        filename = "custom_data_001.csv"
        instrument_file = "cellpy/testdata/data/custom_instrument_001.yml"
    elif file_number == 2:
        filename = "custom_data_002.xlsx"
        instrument_file = "cellpy/testdata/instruments/custom_002.yml"
    elif file_number == 3:
        filename = "custom_data_003.xls"
        instrument_file = "cellpy/testdata/instruments/custom_003.yml"
    else:
        print("not implemented")
        return

    # NEXT: test hooks and make tests

    instrument_file = base_path / instrument_file
    data_dir = base_path / "cellpy/testdata/data"
    name = data_dir / filename

    print(f"File exists? {name.is_file()}")
    if not name.is_file():
        print(f"could not find {name} ")
        return

    print(" RUNNING CELLPY GET ".center(80, "="))
    print(f"{instrument=}")

    c = cellpy.get(
        filename=name,
        instrument=instrument,
        instrument_file=instrument_file,
        mass=1.0,
        auto_summary=False,
    )
    _process_cellpy_object(name, c, out)


def _process_cellpy_object(name, c, out):
    import matplotlib.pyplot as plt

    pd.options.display.max_columns = 100

    print(f"loaded the file - now lets see what we got")
    raw = c.data.raw
    raw.to_clipboard()
    print(raw.head())
    c.make_step_table()

    steps = c.data.steps
    summary = c.data.summary

    raw.to_csv(out / "raw.csv", sep=";")
    steps.to_csv(out / "steps.csv", sep=";")
    summary.to_csv(out / "summary.csv", sep=";")

    fig_1, (ax1, ax2, ax3, ax4) = plt.subplots(
        4,
        1,
        figsize=(6, 10),
        constrained_layout=True,
        sharex=True,
    )
    raw.plot(x="test_time", y="voltage", ax=ax1)
    raw.plot(x="test_time", y="current", ax=ax2)
    raw.plot(x="test_time", y=["charge_capacity", "discharge_capacity"], ax=ax3)
    raw.plot(x="test_time", y="cycle_index", ax=ax4)
    fig_1.suptitle(f"{name.name}", fontsize=16)

    n = c.get_number_of_cycles()
    print(f"Number of cycles: {n}")

    plt.legend()
    plt.show()

    outfile = out / "test_out"
    c.save(outfile)


if __name__ == "__main__":
    check_loader_from_outside_with_get()