"""pec csv-type data files"""
import logging
import os
import warnings
from datetime import datetime
import numpy as np
import pandas as pd
from dateutil.parser import parse
from cellpy.parameters.internal_settings import get_headers_normal
from cellpy.readers.core import Data, FileID, humanize_bytes
from cellpy.readers.instruments.base import BaseLoader
pec_headers_normal = dict()
pec_headers_normal["step_index_txt"] = "Step"
pec_headers_normal["cycle_index_txt"] = "Cycle"
pec_headers_normal["test_time_txt"] = "Total_Time_Seconds" # This might change
pec_headers_normal["step_time_txt"] = "Step_Time_Seconds" # This might change
pec_headers_normal["datetime_txt"] = "Real_Time"
pec_headers_normal["voltage_txt"] = "Voltage_mV" # This might change
pec_headers_normal["current_txt"] = "Current_mA" # This might change
pec_headers_normal["charge_capacity_txt"] = "Charge_Capacity_mAh"
pec_headers_normal["discharge_capacity_txt"] = "Discharge_Capacity_mAh"
pec_headers_normal["charge_energy_txt"] = "Charge_Capacity_mWh"
pec_headers_normal["discharge_energy_txt"] = "Discharge_Capacity_mWh"
pec_headers_normal["internal_resistance_txt"] = "Internal_Resistance_1_mOhm"
pec_headers_normal["test_id_txt"] = "Test"
# TODO: better reading of first part of the file (comments and headers)
# 1. find the units
# 2. convert cycle and step numbers so that they start with 1 and not 0
# 3. find user-defined variables
[docs]class DataLoader(BaseLoader):
"""Main loading class"""
instrument_name = "pec_csv"
raw_ext = "csv"
def __init__(self, *args, **kwargs):
self.headers_normal = (
get_headers_normal()
) # should consider moving this to the Loader class
self.current_chunk = 0 # use this to set chunks to load
self.pec_data = None
self.pec_log = None
self.pec_settings = None
self.variable_header_keywords = [
"Voltage (V)",
"Current (A)",
] # The unit of these will be read from file
self.fake_header_length = [
"#RESULTS CHECK\n",
"#END RESULTS CHECK\n",
] # Ignores number of delimiters in between
self.pec_file_delimiter = ","
self.number_of_header_lines = None # Number of header lines is not constant
self.cellpy_headers = (
get_headers_normal()
) # should consider to move this to the Loader class
# @staticmethod
# def _get_pec_units():
# pec_units = dict()
# pec_units["voltage"] = 0.001 # V
# pec_units["current"] = 0.001 # A
# pec_units["charge"] = 0.001 # Ah
# pec_units["mass"] = 0.001 # g
# pec_units["energy"] = 0.001 # Wh
# return pec_units
def _get_pec_units(self): # Fetches units from a csv file
# Mapping prefixes to values
prefix = {"µ": 10**-6, "m": 10**-3, "": 1}
# Adding the non-variable units to the return value
pec_units = {"charge": 0.001, "mass": 0.001, "energy": 0.001} # Ah # g # Wh
# A list with all the variable keywords without any prefixes, used as search terms
header = self.variable_header_keywords
data = pd.read_csv(
self.temp_file_path, skiprows=self.number_of_header_lines, nrows=1
)
# Searching for the prefix for all the variable units
for item in data.keys():
for unit in header:
x = unit.find("(") - len(unit)
if unit[: x + 1] in item:
y = item[x].replace("(", "")
# Adding units conversion factor to return value, renaming the headers to include correct units
if header.index(unit) == 0:
pec_units["voltage"] = prefix.get(y)
pec_headers_normal["voltage_txt"] = f"Voltage_{y}V"
elif header.index(unit) == 1:
pec_units["current"] = prefix.get(y)
pec_headers_normal["current_txt"] = f"Current_{y}A"
return pec_units
def _get_pec_times(self):
# Mapping units to their conversion values
logging.debug("retrieve pec units")
units = {
"(Hours in hh:mm:ss.xxx)": self.timestamp_to_seconds,
"(Decimal Hours)": 3600,
"(Minutes)": 60,
"(Seconds)": 1,
}
data = pd.read_csv(
self.temp_file_path, skiprows=self.number_of_header_lines, nrows=0
)
pec_times = dict()
# Adds the time variables and their units to the pec_times dictonary return value
# Also updates the column headers in pec_headers_normal with the correct name
for item in data.keys():
for unit in units:
if unit in item:
x = item.find("(")
var = item[: x - 1].lower().replace(" ", "_")
its_unit = item[x:]
pec_times[var] = units.get(its_unit)
if var == "total_time":
pec_headers_normal[
"test_time_txt"
] = f'Total_Time_{its_unit[1:-1].replace(" ", "_")}'
if var == "step_time":
pec_headers_normal[
"step_time_txt"
] = f'Step_Time_{its_unit[1:-1].replace(" ", "_")}'
return pec_times
[docs] @staticmethod
def get_raw_units():
"""Include the settings for the units used by the instrument.
The units are defined w.r.t. the SI units ('unit-fractions'; currently only units that are multiples of
Si units can be used). For example, for current defined in mA, the value for the
current unit-fraction will be 0.001.
Returns: dictionary containing the unit-fractions for current, charge, and mass
"""
raw_units = dict()
raw_units["current"] = "A"
raw_units["charge"] = "Ah"
raw_units["mass"] = "mg"
raw_units["voltage"] = "V"
raw_units["energy"] = "Wh"
raw_units["time"] = "s"
return raw_units
@staticmethod
def _raw_units_for_internal_calculations():
raw_units = dict()
raw_units["current"] = 1.0
raw_units["charge"] = 1.0
raw_units["mass"] = 0.001
raw_units["voltage"] = 1.0
raw_units["energy"] = 1.0
raw_units["time"] = 1.0
return raw_units
[docs] def get_raw_limits(self):
"""Include the settings for how to decide what kind of step you are examining here.
The raw limits are 'epsilons' used to check if the current and/or voltage is stable (for example
for galvanostatic steps, one would expect that the current is stable (constant) and non-zero).
It is expected that different instruments (with different resolution etc.) have different
'epsilons'.
Returns: the raw limits (dict)
"""
warnings.warn("raw limits have not been subject for testing yet")
raw_limits = dict()
raw_limits["current_hard"] = 0.1 # There is a bug in PEC
raw_limits["current_soft"] = 1.0
raw_limits["stable_current_hard"] = 2.0
raw_limits["stable_current_soft"] = 4.0
raw_limits["stable_voltage_hard"] = 2.0
raw_limits["stable_voltage_soft"] = 4.0
raw_limits["stable_charge_hard"] = 2.0
raw_limits["stable_charge_soft"] = 5.0
raw_limits["ir_change"] = 0.00001
return raw_limits
[docs] def loader(self, file_name, bad_steps=None, **kwargs):
# self.name = file_name
# self.copy_to_temporary()
self.number_of_header_lines = self._find_header_length()
data = Data()
self.generate_fid()
data.raw_data_files.append(self.fid)
# div parameters and information (probably load this last)
data.loaded_from = self.name
# some overall prms
data.channel_index = None
data.creator = None
data.schedule_file_name = None
data.test_ID = None
data.test_name = None
# --------- read raw-data (normal-data) -------------------------
self._load_pec_data(bad_steps)
data.start_datetime = self.pec_settings["start_time"]
length_of_test = self.pec_data.shape[0]
logging.debug(f"length of test: {length_of_test}")
logging.debug("renaming columns")
self._rename_headers()
self._convert_units()
# cycle indices should not be 0
if 0 in self.pec_data["cycle_index"]:
self.pec_data["cycle_index"] += 1
data.raw = self.pec_data
data.raw_data_files_length.append(length_of_test)
return data
def _load_pec_data(self, bad_steps):
if bad_steps is not None:
warnings.warn("bad_steps is not implemented yet for this instrument")
file_name = self.temp_file_path
number_of_header_lines = self.number_of_header_lines
# ----------------- reading the data ---------------------
df = pd.read_csv(file_name, skiprows=number_of_header_lines)
# get rid of unnamed columns
df = df.loc[:, ~df.columns.str.contains("^Unnamed")]
# get rid of spaces, parenthesis, and the deg-sign
new_column_headers = {
c: c.replace(" ", "_")
.replace("(", "")
.replace(")", "")
.replace("°", "")
.replace(r"%", "pct")
for c in df.columns
}
df.rename(columns=new_column_headers, inplace=True)
# add missing columns
df.insert(0, self.headers_normal.data_point_txt, range(len(df)))
df[self.headers_normal.sub_step_index_txt] = 0
df[self.headers_normal.sub_step_time_txt] = 0
self.pec_data = df
# ---------------- reading the parameters ---------------
with open(file_name, "r") as ofile:
counter = 0
lines = []
for line in ofile:
counter += 1
if counter > number_of_header_lines:
break
lines.append(line)
self._extract_variables(lines)
def _extract_variables(self, lines):
header_comments = dict()
comment_loop = False
for line_number, line in enumerate(lines):
if line.startswith("#"):
if not comment_loop:
comment_loop = True
else:
comment_loop = False
else:
if not comment_loop:
parts = line.split(",")
variable = parts[0].strip()
variable = variable.strip(":")
variable = variable.replace(" ", "_")
try:
value = parts[1].strip()
except IndexError:
value = None
if not value:
value = np.nan
header_comments[variable] = value
logging.debug(" Headers Dict ")
logging.debug(header_comments)
headers = dict()
start_time = parse(header_comments["Start_Time"])
end_time = parse(header_comments["End_Time"])
headers["start_time"] = start_time
headers["end_time"] = end_time
# headers["test_regime_name"] = header_comments["TestRegime_Name"]
self.pec_settings = headers
def _rename_headers(self):
logging.debug("Trying to rename the columns")
# logging.debug("Current columns:")
# logging.debug(self.pec_data.columns)
# logging.debug("Rename to:")
# logging.debug(self.headers_normal)
for key in pec_headers_normal:
self._rename_header(key, pec_headers_normal[key])
# logging.debug("New cols:")
# logging.debug(self.pec_data.columns)
def _convert_units(self):
logging.debug("Trying to convert all data into correct units")
logging.debug("- dtypes")
self.pec_data[self.headers_normal.datetime_txt] = pd.to_datetime(
self.pec_data[self.headers_normal.datetime_txt]
)
self.pec_data["Position_Start_Time"] = pd.to_datetime(
self.pec_data["Position_Start_Time"]
)
self.pec_data["Rack"] = self.pec_data["Rack"].astype("category")
logging.debug("- cellpy units")
pec_units = self._get_pec_units()
pec_times = self._get_pec_times()
raw_units = self._raw_units_for_internal_calculations()
self._rename_headers() # Had to run this again after fixing the headers, might be a better way to fix this
_v = pec_units["voltage"] / raw_units["voltage"]
_i = pec_units["current"] / raw_units["current"]
_c = pec_units["charge"] / raw_units["charge"]
_w = pec_units["energy"] / raw_units["energy"]
# Check if time is given in a units proportional to seconds or in a hh:mm:ss.xxx format
# Convert all hh:mm:ss.xxx formats to seconds using self.timestamp_to_seconds()
relevant_times = ["total_time", "step_time"]
for x in relevant_times:
if isinstance(pec_times[x], (int, float)):
if x == relevant_times[0]:
_tt = pec_times["total_time"] / raw_units["time"]
self.pec_data[self.headers_normal.test_time_txt] *= _tt
elif x == relevant_times[1]:
_st = pec_times["step_time"] / raw_units["time"]
self.pec_data[self.headers_normal.step_time_txt] *= _st
elif callable(pec_times[x]):
# EDIT jepe 18.06.2020: change to .apply(func) instead of for-loop
# (now the column is of float64 type and behaves properly)
if x == relevant_times[0]:
# col = self.pec_data[self.headers_normal.test_time_txt]
hdr = self.headers_normal.test_time_txt
elif x == relevant_times[1]:
# col = self.pec_data[self.headers_normal.step_time_txt]
hdr = self.headers_normal.test_time_txt
self.pec_data[hdr] = self.pec_data[hdr].apply(pec_times[x])
# for i in range(len(col)):
# col[i] = pec_times[x](col[i])
v_txt = self.headers_normal.voltage_txt
i_txt = self.headers_normal.current_txt
self.pec_data[v_txt] *= _v
self.pec_data[i_txt] *= _i
self.pec_data[self.headers_normal.charge_capacity_txt] *= _c
self.pec_data[self.headers_normal.discharge_capacity_txt] *= _c
self.pec_data[self.headers_normal.charge_energy_txt] *= _w
self.pec_data[self.headers_normal.discharge_energy_txt] *= _w
def _rename_header(self, h_old, h_new):
try:
self.pec_data.rename(
columns={h_new: self.cellpy_headers[h_old]}, inplace=True
)
except KeyError as e:
logging.info(f"Problem during conversion to cellpy-format ({e})")
def _find_header_length(self):
skiprows = 0
resultscheck = False # Ignore number of delimiters inside RESULTS CHECK
with open(self.temp_file_path, "r") as header:
for line in header:
if line in self.fake_header_length:
resultscheck = not resultscheck
if (
line.count(self.pec_file_delimiter) > 1 and not resultscheck
): # End when there are >2 columns
break
skiprows += 1
return skiprows
[docs] @staticmethod
def timestamp_to_seconds(timestamp): # Changes hh:mm:s.xxx time format to seconds
total_secs = 0
# strptime can not handle more than 24 hours, days are counted manually
hours = int(timestamp[:2])
if hours >= 24:
days = hours // 24
total_secs += days * 3600 * 24
timestamp = str(hours - 24 * days) + timestamp[2:]
total_secs += (
datetime.strptime(timestamp, "%H:%M:%S.%f")
- datetime.strptime("00:00:00.000", "%H:%M:%S.%f")
).total_seconds()
return total_secs
if __name__ == "__main__":
pass