import logging
from dataclasses import dataclass
from typing import Tuple, Dict, List
import pandas as pd
import numpy as np
from cellpy.parameters.internal_settings import BaseHeaders, CELLPY_FILE_VERSION
@dataclass
@dataclass
@dataclass
@dataclass
@dataclass
# pre_aux: str = "aux_"
@dataclass
# pre_aux: str = "aux_"
@dataclass
@dataclass
@dataclass
@dataclass
[docs]
def rename_step_columns(
steps: pd.DataFrame,
old_version: int,
new_version: int = CELLPY_FILE_VERSION,
**kwargs,
) -> pd.DataFrame:
logging.debug("renaming headers")
old = summary_header_versions.get(old_version)
new = summary_header_versions.get(new_version)
steps = rename_columns(
steps,
old,
new,
**kwargs,
)
return steps
[docs]
def rename_raw_columns(
raw: pd.DataFrame,
old_version: int,
new_version: int = CELLPY_FILE_VERSION,
**kwargs,
) -> pd.DataFrame:
logging.debug("renaming headers")
old = raw_header_versions.get(old_version)
new = raw_header_versions.get(new_version)
raw = rename_columns(
raw,
old,
new,
**kwargs,
)
return raw
[docs]
def rename_summary_columns(
summary: pd.DataFrame,
old_version: int,
new_version: int = CELLPY_FILE_VERSION,
**kwargs,
) -> pd.DataFrame:
"""Rename the summary headers to new format.
Args:
summary: summary dataframe in old format.
old_version: old format (cellpy_file_format (might use summary format number instead soon)).
new_version: new format (cellpy_file_format (might use summary format number instead soon)).
**kwargs:
remove_missing_in_new (bool): remove the columns that are not defined in the new format.
populate_missing_in_old (bool): add "new-format" missing columns (with np.NAN).
Returns:
summary (pandas.DataFrame) with column headers in the new format.
"""
logging.debug("renaming headers")
old = summary_header_versions.get(old_version)
new = summary_header_versions.get(new_version)
summary = rename_columns(
summary,
old,
new,
**kwargs,
)
return summary
[docs]
def rename_fid_columns(
fid_table: pd.DataFrame,
old_version: int,
new_version: int = CELLPY_FILE_VERSION,
**kwargs,
) -> pd.DataFrame:
logging.debug("renaming headers")
logging.critical(
"RENAMING NOT IMPLEMENTED YET -> Please, create an issue on Github"
)
return fid_table
[docs]
def get_column_name_mapper(
old_columns: BaseHeaders, new_columns: BaseHeaders
) -> Tuple[Dict[str, str], List[str], List[str]]:
"""Create a dictionary that maps old column names to new column names.
Args:
old_columns: The BaseHeaders for the old format.
new_columns: The BaseHeaders for the new format.
Returns:
Translation dictionary, list of missing keys in new format, list of missing keys in old format.
"""
translations = {}
missing_in_old = []
old_columns_keys = old_columns.keys()
new_columns_keys = new_columns.keys()
for key in new_columns_keys:
if old_column := old_columns.get(key):
translations[old_column] = new_columns.get(key)
old_columns_keys.remove(key)
else:
missing_in_old.append(key)
missing_in_new = old_columns_keys
return translations, missing_in_new, missing_in_old
[docs]
def rename_columns(
df: pd.DataFrame,
old: BaseHeaders,
new: BaseHeaders,
remove_missing_in_new: bool = False,
populate_missing_in_old: bool = True,
) -> pd.DataFrame:
"""Rename the column headers of a cells dataframe.
Usage:
>>> old_format_headers = HeadersSummaryV6()
>>> new_format_headers = HeadersSummaryV7()
>>> df_new_format = rename_columns(df_old_format, old_format_headers, new_format_headers)
Args:
df: The dataframe.
old: The BaseHeaders for the old format.
new: The BaseHeaders for the new format.
remove_missing_in_new: remove the columns that are not defined in the new format.
populate_missing_in_old: add "new-format" missing columns (with np.NAN).
Returns:
Dataframe with updated columns
"""
col_name_mapper, missing_in_new, missing_in_old = get_column_name_mapper(old, new)
if remove_missing_in_new:
for col in missing_in_new:
df = df.drop(col, axis=1)
if populate_missing_in_old:
for col in missing_in_old:
df[col] = np.NAN
return df.rename(columns=col_name_mapper)
def _create_dummy_summary(columns):
df = pd.DataFrame(
data=np.random.rand(5, len(columns) - 1), index=range(1, 6), columns=columns[1:]
)
df.index.name = columns[0]
return df
# Use this as inspiration if you want to implement translation of step table headers:
# HEADERS_KEYS_STEP_TABLE_EXTENDED = [
# "point",
# "test_time",
# "step_time",
# "current",
# "voltage",
# "charge",
# "discharge",
# "internal_resistance",
# ]
# HEADERS_STEP_TABLE_EXTENSIONS = ["min", "max", "avr", "first", "last", "delta", "std"]
#
#
# headers_step_table_v5["test"] = "test"
# headers_step_table_v5["ustep"] = "ustep"
# headers_step_table_v5["cycle"] = "cycle"
# headers_step_table_v5["step"] = "step"
# headers_step_table_v5["test_time"] = "test_time"
# headers_step_table_v5["step_time"] = "step_time"
# headers_step_table_v5["sub_step"] = "sub_step"
# headers_step_table_v5["type"] = "type"
# headers_step_table_v5["sub_type"] = "sub_type"
# headers_step_table_v5["info"] = "info"
# headers_step_table_v5["voltage"] = "voltage"
# headers_step_table_v5["current"] = "current"
# headers_step_table_v5["charge"] = "charge"
# headers_step_table_v5["discharge"] = "discharge"
# headers_step_table_v5["point"] = "point"
# headers_step_table_v5["internal_resistance"] = "ir"
# headers_step_table_v5["internal_resistance_change"] = "ir_pct_change"
# headers_step_table_v5["rate_avr"] = "rate_avr"
# def translate_headers(data_sets, cellpy_file_version):
# # this works for upgrading to versions 6,
# # remark that the extensions for the step table is hard-coded
# logging.debug(f"translate headers from v{cellpy_file_version}")
#
# summary_rename_dict = {
# headers_summary_v5[key]: HEADERS_SUMMARY[key]
# for key in HEADERS_SUMMARY
# if key in headers_summary_v5
# }
#
# steps_rename_dict = {
# headers_step_table_v5[key]: HEADERS_STEP_TABLE[key]
# for key in HEADERS_STEP_TABLE
# if key in headers_step_table_v5
# }
#
# steps_rename_dict_extensions = {}
# for key in HEADERS_KEYS_STEP_TABLE_EXTENDED:
# for extension in HEADERS_STEP_TABLE_EXTENSIONS:
# old = "_".join([HEADERS_STEP_TABLE[key], extension])
# new = "_".join([headers_step_table_v5[key], extension])
# steps_rename_dict_extensions[old] = new
#
# raw_rename_dict = {
# headers_normal_v5[key]: HEADERS_NORMAL[key] for key in HEADERS_NORMAL
# }
#
# summary_index_name = HEADERS_SUMMARY["cycle_index"]
# raw_index_name = HEADERS_NORMAL["data_point_txt"]
#
# # from pprint import pprint
# # pprint(summary_rename_dict)
# # pprint(steps_rename_dict)
# # pprint(steps_rename_dict_extensions)
# # pprint(raw_rename_dict)
#
# new_data_sets = []
# for data_set in data_sets:
# data_set.summary.rename(columns=summary_rename_dict, inplace=True)
# data_set.raw.rename(columns=raw_rename_dict, inplace=True)
# data_set.steps.rename(columns=steps_rename_dict, inplace=True)
# data_set.steps.rename(columns=steps_rename_dict_extensions, inplace=True)
#
# # we also need to update the index-name
# data_set.summary.index.name = summary_index_name
# data_set.raw.index.name = raw_index_name
#
# new_data_sets.append(data_set)
#
# # pprint(data_set.summary.columns)
# # pprint(data_set.steps.columns)
# # pprint(data_set.raw.columns)
# # check(new_data_sets)
# return new_data_sets
def _check():
old = HeadersSummaryV6()
new = HeadersSummaryV7()
df = _create_dummy_summary(columns=old.keys())
remove_missing_in_new = False
populate_missing_in_old = True
df = rename_columns(
df,
old,
new,
remove_missing_in_new=remove_missing_in_new,
populate_missing_in_old=populate_missing_in_old,
)
print(df.head())
if __name__ == "__main__":
_check()