Source code for cellpy.utils.batch_tools.batch_helpers

import csv
import itertools
import logging
import os
import warnings

import pandas as pd

import cellpy.parameters.internal_settings
from cellpy import filefinder, prms
from cellpy.readers import core
from cellpy.exceptions import ExportFailed, NullData, WrongFileVersion

# logger = logging.getLogger(__name__)
from cellpy.parameters.internal_settings import headers_step_table

hdr_summary = cellpy.parameters.internal_settings.get_headers_summary()
hdr_journal = cellpy.parameters.internal_settings.get_headers_journal()


CELL_TYPE_IDS = ["cc", "ec", "eth"]


[docs]def look_up_and_get(cellpy_file_name, table_name, root=None, max_cycle=None):
    """Extracts table from cellpy hdf5-file."""

    # infoname = '/CellpyData/info'
    # dataname = '/CellpyData/dfdata'
    # summaryname = '/CellpyData/dfsummary'
    # fidname = '/CellpyData/fidtable'
    # stepname = '/CellpyData/step_table'

    if root is None:
        root = "/CellpyData"
    table_path = "/".join([root, table_name])

    logging.debug(f"look_up_and_get({cellpy_file_name}, {table_name}")
    store = pd.HDFStore(cellpy_file_name)
    # max_cycle is not implemented properly yet
    # TODO: implement max_cycle
    try:
        if max_cycle and table_name == prms._cellpyfile_step:
            _cycle_header = headers_step_table.cycle
            cycles = store.select(table_path, where="columns=[_cycle_header]")
            _where = cycles[_cycle_header] <= max_cycle
            table = store.select(table_path, where=_where)
        else:
            table = store.select(table_path)
        store.close()
    except KeyError as e:
        logging.warning("Could not read the table")
        store.close()
        raise WrongFileVersion(e)
    return table


[docs]def create_folder_structure(project_name, batch_name):
    """This function creates a folder structure for the batch project.

    The folder structure consists of main working folder ``project_name`
    located in the ``outdatadir`` (as defined in the cellpy configuration file)
    with a sub-folder named ``batch_name``. It also creates a folder
    inside the ``batch_name`` folder for storing the raw data.
    If the folders does not exist, they will be made. The function also returns
    the name of the info-df.

    Args:
        project_name: name of the project
        batch_name: name of the batch

    Returns: (info_file, (project_dir, batch_dir, raw_dir))

    """
    out_data_dir = prms.Paths.outdatadir
    project_dir = os.path.join(out_data_dir, project_name)
    batch_dir = os.path.join(project_dir, batch_name)
    raw_dir = os.path.join(batch_dir, "raw_data")

    # create folders
    if not os.path.isdir(project_dir):
        os.mkdir(project_dir)
    if not os.path.isdir(batch_dir):
        os.mkdir(batch_dir)
    if not os.path.isdir(raw_dir):
        os.mkdir(raw_dir)

    # create file-name for the info_df (json)
    info_file = "cellpy_batch_%s.json" % batch_name
    info_file = os.path.join(project_dir, info_file)
    return info_file, (project_dir, batch_dir, raw_dir)


[docs]def create_factory():
    instrument_factory = core.InstrumentFactory()
    instruments = core.find_all_instruments()
    for instrument_id, instrument in instruments.items():
        instrument_factory.register_builder(instrument_id, instrument)
    return instrument_factory


[docs]def find_files(info_dict, file_list=None, pre_path=None, sub_folders=None, **kwargs):
    """Find files using cellpy.filefinder.

    Args:
        info_dict: journal pages.
        file_list: list of files names to search through.
        pre_path: path to prepend found files from file_list (if file_list is given).
        sub_folders (bool): perform search also in sub-folders.

    **kwargs (filefinder.search_for_files):
        run_name(str): run-file identification.
        raw_extension(str): optional, extension of run-files (without the '.').
        cellpy_file_extension(str): optional, extension for cellpy files
            (without the '.').
        raw_file_dir(path): optional, directory where to look for run-files
            (default: read prm-file)
        cellpy_file_dir(path): optional, directory where to look for
            cellpy-files (default: read prm-file)
        prm_filename(path): optional parameter file can be given.
        file_name_format(str): format of raw-file names or a glob pattern
            (default: YYYYMMDD_[name]EEE_CC_TT_RR).
        reg_exp(str): use regular expression instead (defaults to None).
        file_list (list of str): perform the search within a given list
            of filenames instead of searching the folder(s). The list should
            not contain the full filepath (only the actual file names). If
            you want to provide the full path, you will have to modify the
            file_name_format or reg_exp accordingly.
        pre_path (path or str): path to prepend the list of files selected
             from the file_list.

    Returns:
        info_dict
    """
    sub_folders = sub_folders or prms.FileNames.sub_folders
    instrument_factory = create_factory()
    # searches for the raw data files and the cellpyfile-name
    # TODO: implement faster file searching
    # TODO: implement option for not searching for raw-file names if force_cellpy is True
    for i, run_name in enumerate(info_dict[hdr_journal["filename"]]):
        try:
            instrument = info_dict[hdr_journal["instrument"]][i]
            raw_ext = instrument_factory.query(instrument, "raw_ext")
            if raw_ext:
                prms.FileNames.raw_extension = raw_ext
        except IndexError:
            warnings.warn(f"no instrument given for {run_name}")

        logging.debug(f"checking for {run_name}")
        raw_files, cellpyfile = filefinder.search_for_files(
            run_name,
            file_list=file_list,
            pre_path=pre_path,
            sub_folders=sub_folders,
            **kwargs,
        )
        if not raw_files:
            raw_files = None
        info_dict[hdr_journal["raw_file_names"]].append(raw_files)
        info_dict[hdr_journal["cellpy_file_name"]].append(cellpyfile)

    return info_dict


[docs]def fix_groups(groups):
    """Takes care of strange group numbers."""
    _groups = []
    unique_groups = list(set(groups))
    lookup = {}
    for i, g in enumerate(unique_groups):
        lookup[g] = i + 1
    for i, g in enumerate(groups):
        _groups.append(lookup[g])
    return _groups


[docs]def save_multi(data, file_name, sep=";"):
    """Convenience function for storing data column-wise in a csv-file."""
    logging.debug("saving multi")
    with open(file_name, "w", newline="") as f:
        logging.debug(f"{file_name} opened")
        writer = csv.writer(f, delimiter=sep)
        try:
            writer.writerows(itertools.zip_longest(*data))
            logging.info(f"{file_name} OK")
        except Exception as e:
            logging.info(f"Exception encountered in batch._save_multi: {e}")
            raise ExportFailed
        logging.debug("wrote rows using itertools in _save_multi")


[docs]def make_unique_groups(info_df):
    """This function cleans up the group numbers a bit."""
    # fixes group numbering
    unique_g = info_df[hdr_journal.group].unique()
    unique_g = sorted(unique_g)
    new_unique_g = list(range(len(unique_g)))
    info_df[hdr_journal.sub_group] = info_df[hdr_journal.group] * 0
    for i, j in zip(unique_g, new_unique_g):
        counter = 1
        for indx, row in info_df.loc[info_df[hdr_journal.group] == i].iterrows():
            info_df.at[indx, hdr_journal.sub_group] = counter
            counter += 1
        info_df.loc[info_df[hdr_journal.group] == i, hdr_journal.group] = j + 1
    return info_df


def _remove_date_and_celltype(
    label,
):
    parts = label.split("_")
    parts.pop(0)
    if parts[-1] in CELL_TYPE_IDS:
        parts.pop(-1)
    return "_".join(parts)


[docs]def create_labels(label, *args):
    """Returns a re-formatted label (currently it only removes the dates
    from the run-name)"""
    return _remove_date_and_celltype(label)


[docs]def create_selected_summaries_dict(summaries_list):
    """Creates a dictionary with summary column headers.

    Examples:
        >>> summaries_to_output = ["discharge_capacity_gravimetric", "charge_capacity_gravimetric"]
        >>> summaries_to_output_dict = create_selected_summaries_dict(
        >>>    summaries_to_output
        >>> )
        >>> print(summaries_to_output_dict)
        {'discharge_capacity_gravimetric': "discharge_capacity_gravimetric",
               'charge_capacity_gravimetric': "discharge_capacity_gravimetric"}

    Args:
        summaries_list: list containing cellpy summary column id names

    Returns: dictionary of the form {cellpy id name: cellpy summary
        header name,}

    """
    selected_summaries = dict()
    for h in summaries_list:
        selected_summaries[h] = hdr_summary[h]
    return selected_summaries


[docs]def pick_summary_data(key, summary_df, selected_summaries):
    """picks the selected pandas.DataFrame"""

    selected_summaries_dict = create_selected_summaries_dict(selected_summaries)
    value = selected_summaries_dict[key]
    return summary_df.iloc[:, summary_df.columns.get_level_values(1) == value]


[docs]def join_summaries(summary_frames, selected_summaries, keep_old_header=False):
    """parse the summaries and combine based on column (selected_summaries)"""
    if not summary_frames:
        raise NullData("No summaries available to join")
    selected_summaries_dict = create_selected_summaries_dict(selected_summaries)
    out = []
    frames = []
    keys = []  # test-name

    for key in summary_frames:
        keys.append(key)
        if summary_frames[key].empty:
            logging.debug("Empty summary_frame encountered")

        frames.append(summary_frames[key])

    summary_df = pd.concat(frames, keys=keys, axis=1, sort=True)

    for key, value in selected_summaries_dict.items():
        _summary_df = summary_df.iloc[
            :, summary_df.columns.get_level_values(1) == value
        ]
        _summary_df.name = key

        if not keep_old_header:
            try:
                _summary_df.columns = _summary_df.columns.droplevel(-1)
            except AttributeError as e:
                logging.debug("could not drop level from frame")
                logging.debug(e)

        out.append(_summary_df)
    logging.debug("finished joining summaries")

    return out


[docs]def generate_folder_names(name, project):
    """Creates sensible folder names."""

    out_data_dir = prms.Paths.outdatadir
    project_dir = os.path.join(out_data_dir, project)
    batch_dir = os.path.join(project_dir, name)
    raw_dir = os.path.join(batch_dir, "raw_data")
    return out_data_dir, project_dir, batch_dir, raw_dir


def _extract_dqdv(cell_data, extract_func, last_cycle):
    """Simple wrapper around the cellpy.utils.ica.dqdv function."""

    from cellpy.utils.ica import dqdv

    list_of_cycles = cell_data.get_cycle_numbers()
    if last_cycle is not None:
        list_of_cycles = [c for c in list_of_cycles if c <= int(last_cycle)]
        logging.debug(f"only processing up to cycle {last_cycle}")
        logging.debug(f"you have {len(list_of_cycles)} cycles to process")
    out_data = []
    for cycle in list_of_cycles:
        try:
            c, v = extract_func(cycle, return_dataframe=False)
            v, dq = dqdv(v, c)
            v = v.tolist()
            dq = dq.tolist()
        except NullData as e:
            v = list()
            dq = list()
            logging.info(" Ups! Could not process this (cycle %i)" % cycle)
            logging.info(" %s" % e)

        header_x = "dQ cycle_no %i" % cycle
        header_y = "voltage cycle_no %i" % cycle
        dq.insert(0, header_x)
        v.insert(0, header_y)

        out_data.append(v)
        out_data.append(dq)
    return out_data


[docs]def export_dqdv(cell_data, savedir, sep, last_cycle=None):
    """Exports dQ/dV data from a CellpyCell instance.

    Args:
        cell_data: CellpyCell instance
        savedir: path to the folder where the files should be saved
        sep: separator for the .csv-files.
        last_cycle: only export up to this cycle (if not None)
    """
    logging.debug("exporting dqdv")
    filename = cell_data.data.loaded_from
    no_merged_sets = ""
    firstname, extension = os.path.splitext(filename)
    firstname += no_merged_sets
    if savedir:
        firstname = os.path.join(savedir, os.path.basename(firstname))
        logging.debug(f"savedir is true: {firstname}")

    outname_charge = firstname + "_dqdv_charge.csv"
    outname_discharge = firstname + "_dqdv_discharge.csv"

    list_of_cycles = cell_data.get_cycle_numbers()
    number_of_cycles = len(list_of_cycles)
    logging.debug("%s: you have %i cycles" % (filename, number_of_cycles))

    # extracting charge
    out_data = _extract_dqdv(cell_data, cell_data.get_ccap, last_cycle)
    logging.debug("extracted ica for charge")
    try:
        save_multi(data=out_data, file_name=outname_charge, sep=sep)
    except ExportFailed as e:
        logging.info("could not export ica for charge")
        warnings.warn(f"ExportFailed exception raised: {e}")
    else:
        logging.debug("saved ica for charge")

    # extracting discharge
    out_data = _extract_dqdv(cell_data, cell_data.get_dcap, last_cycle)
    logging.debug("extracted ica for discharge")
    try:
        save_multi(data=out_data, file_name=outname_discharge, sep=sep)
    except ExportFailed as e:
        logging.info("could not export ica for discharge")
        warnings.warn(f"ExportFailed exception raised: {e}")
    else:
        logging.debug("saved ica for discharge")