Source code for cellpy.utils.batch_tools.batch_core

import abc
import collections
import logging
import os
import random

from cellpy import cellreader, prms
from cellpy.exceptions import UnderDefined
from cellpy.parameters.internal_settings import get_headers_journal
from cellpy.utils.batch_tools import batch_helpers as helper

#  import box


hdr_journal = get_headers_journal()
empty_farm = []


[docs]class Doer(metaclass=abc.ABCMeta): """Base class for all the classes that do something to the experiment(s). Attributes: experiments: list of experiments. farms: list of farms (one pr experiment) (containing pandas DataFrames). barn (str): identifier for where to place the output-files (i.e. the animals) (typically a directory path). The do-er iterates through all the connected engines and dumpers (the dumpers are run for each engine). It is the responsibility of the engines and dumpers to iterate through the experiments. The most natural way is to work with just one experiment. """ def __init__(self, *args): """Setting up the Do-er. Args: *args: list of experiments """ self.experiments = [] self.farms = ( [] ) # A list of lists, each list is a green field where your animals wander around self.engines = [] # The engines creates the animals self.dumpers = [] # The dumpers places animals in the barn self.barn = ( None # This is where we put the animals during winter (and in the night) ) # Decide if the farm should be locked or not. If not locked, the farm will be emptied # before each engine run (if the farm is not locked, the animals will escape). # Typically, you would not want to lock the farm. # Remark that also the engines have access to the farms (gets the farm # as input and sends a modified version back), and most of them empties the farm before populating # them with new content anyway: self.locked = False args = self._validate_base_experiment_type(args) if args: self.experiments.extend(args) self.farms.append(empty_farm) def _assign_engine(self, engine): self.engines.append(engine) def _assign_dumper(self, dumper): self.dumpers.append(dumper)
[docs] @abc.abstractmethod def run_engine(self, engine, **kwargs): """Set the current_engine and run it. The method sets and engages the engine (callable) and provide appropriate binding to at least the class attributes self.farms and self.barn. Example: self.current_engine = engine self.farms, self.barn = engine(experiments=self.experiments, farms=self.farms, **kwargs) Args: engine (callable): the function that should be called. **kwargs: additional keyword arguments sent to the callable. """ pass
[docs] @abc.abstractmethod def run_dumper(self, dumper): pass
def __str__(self): return f"({self.__class__.__name__})" def __repr__(self): return self.__class__.__name__ @staticmethod def _validate_base_experiment_type(args): if len(args) == 0: return None for arg in args: if not isinstance(arg, BaseExperiment): err = f"{repr(arg)} is not an instance of BaseExperiment" raise TypeError(err) return args
[docs] def info(self): """Delivers some info to you about the class.""" print("Sorry, but I don't have much to share.") print("This is me:") print(self) print("And these are the experiments assigned to me:") print(self.experiments)
[docs] def assign(self, experiment): """Assign an experiment.""" self.experiments.append(experiment) self.farms.append(empty_farm)
[docs] def empty_the_farms(self): """Free all the farms for content (empty all lists).""" if not self.locked: logging.debug("emptying the farm for all the pandas") self.farms = [[] for _ in self.farms]
[docs] def do(self, **kwargs): """Do what is needed and dump it for each engine.""" if not self.experiments: raise UnderDefined("cannot run until you have assigned an experiment") for engine in self.engines: self.empty_the_farms() logging.debug(f"running - {str(engine)}") self.run_engine(engine, **kwargs) for dumper in self.dumpers: logging.debug(f"exporting - {str(dumper)}") self.run_dumper(dumper)
[docs]class Data(collections.UserDict): """Class that is used to access the experiment.journal.pages DataFrame. The Data class loads the complete cellpy-file if raw-data is not already loaded in memory. In future version, it could be that the Data object will return a link allowing querying instead to save memory usage... Remark that some cellpy (cellreader.CellpyCell) function might not work if you have the raw-data in memory, but not summary data (if the cellpy function requires summary data or other settings not set as default). """ # TODO (jepe): decide if we should included querying functionality here. # TODO (jepe): implement experiment.last_cycle # TODO (jepe): consider renaming for v1.0.0 (Cell will be renamed to Data). def __init__(self, experiment, *args): super().__init__(*args) self.experiment = experiment self.query_mode = False self.accessor_pre = "x_" self.accessors = {} self._create_accessors() def _create_accessor_label(self, cell_label): return self.accessor_pre + cell_label def _create_cell_label(self, accessor_label): return accessor_label.lstrip(self.accessor_pre) def _create_accessors(self): cell_labels = self.experiment.journal.pages.index for cell_label in cell_labels: try: self.accessors[ self._create_accessor_label(cell_label) ] = self.experiment.cell_data_frames[cell_label] except KeyError as e: logging.debug( f"Could not create accessors for {cell_label}" f"(probably missing from the experiment.cell_data_frames" f"attribute) {e}" ) def __getitem__(self, cell_id): cellpy_data_object = self.__look_up__(cell_id) return cellpy_data_object def __dir__(self): # This is the secret sauce that allows jupyter to do tab-completion return self.accessors def __str__(self): t = "" if not self.experiment.cell_data_frames: t += "{}" else: for k in self.experiment.cell_data_frames: t += f"'{k}'\n" t += str(self.experiment.cell_data_frames[k]) t += "\n" t += "\n" return t def __getattr__(self, item): if item in self.accessors: item = self._create_cell_label(item) return self.__getitem__(item) else: return super().__getattribute__(item) def __look_up__(self, cell_id): try: if not self.experiment.cell_data_frames[cell_id].data.raw.empty: return self.experiment.cell_data_frames[cell_id] else: raise AttributeError except AttributeError: logging.debug("Need to do a look-up from the cellpy file") # last_cycle = self.experiment.max_cycle pages = self.experiment.journal.pages info = pages.loc[cell_id, :] cellpy_file = str(info[hdr_journal.cellpy_file_name]) # linking (query_mode) not implemented yet - loading whole file in mem instead if not self.query_mode: # TODO: modify _load_cellpy_file so that it can select parts of the data (max_cycle etc) cell = self.experiment._load_cellpy_file(cellpy_file) # noqa self.experiment.cell_data_frames[cell_id] = cell # trick for making tab-completion work: self.accessors[ self._create_accessor_label(cell_id) ] = self.experiment.cell_data_frames[cell_id] return cell else: raise NotImplementedError
[docs] def sample(self): """Pick out one random cell from the batch""" cell_labels = self.experiment.journal.pages.index cell_id = random.choice(cell_labels) return self.__look_up__(cell_id)
[docs] def first(self): """Pick out first cell from the batch""" cell_labels = self.experiment.journal.pages.index cell_id = cell_labels[0] return self.__look_up__(cell_id)
[docs] def last(self): """Pick out last cell from the batch""" cell_labels = self.experiment.journal.pages.index cell_id = cell_labels[-1] return self.__look_up__(cell_id)
[docs]class BaseExperiment(metaclass=abc.ABCMeta): """An experiment contains experimental data and meta-data.""" def __init__(self, *args): self.journal = None self.summary_frames = None self.cell_data_frames = dict() self.memory_dumped = dict() self.parent_level = "CellpyCell" self.log_level = "CRITICAL" self._data = None self._store_data_object = True self._cellpy_object = None self.limit = 10 self._max_cycle = None def __str__(self): return ( f"[{self.__class__.__name__}]\n" f"journal: \n{str(self.journal)}\n" f"data: \n{str(self.data)}" ) def __repr__(self): return self.__class__.__name__ def __len__(self): try: length = len(self.journal.pages.index) except TypeError: length = 0 return length def __iter__(self): self._counter = 0 self._limit = len(self) return self def __next__(self): counter = self._counter limit = self._limit if counter >= limit: raise StopIteration else: self._counter += 1 cell_label = self.journal.pages.index[counter] try: logging.debug(f"looking for cell {cell_label}") cellpy_object = self.data[cell_label] except (TypeError, KeyError): logging.debug("There is no data available - trying to link") try: self._link_cellpy_file(cell_label) cellpy_object = self.data[cell_label] except (IOError, KeyError, UnderDefined): raise StopIteration return cellpy_object def _link_cellpy_file(self, cell_label, max_cycle=None): # creates a CellpyCell object and loads only the step-table logging.debug("linking cellpy file") cellpy_file_name = self.journal.pages.loc[ cell_label, hdr_journal.cellpy_file_name ] if not os.path.isfile(cellpy_file_name): raise IOError cellpy_object = cellreader.CellpyCell(initialize=True) step_table = helper.look_up_and_get( cellpy_file_name, prms._cellpyfile_step, max_cycle=max_cycle ) if step_table.empty: raise UnderDefined if max_cycle: cellpy_object.overwrite_able = False self.max_cycle = max_cycle cellpy_object.data.steps = step_table self._data = None self.cell_data_frames[cell_label] = cellpy_object def _load_cellpy_file(self, file_name): # TODO: modify this so that it can select parts of the data (max_cycle etc) selector = dict() cellpy_data = cellreader.CellpyCell() if self.max_cycle: cellpy_data.overwrite_able = False selector["max_cycle"] = self.max_cycle cellpy_data.load(file_name, self.parent_level, selector=selector) logging.info(f" <- grabbing ( {file_name} )") return cellpy_data @property def max_cycle(self): return self._max_cycle @max_cycle.setter def max_cycle(self, value): self._max_cycle = value @property def data(self): """Property for accessing the underlying data in an experiment. Example: >>> cell_data_one = experiment.data["2018_cell_001"] >>> capacity, voltage = cell_data_one.get_cap(cycle=1) """ # TODO: implement max cycle number (experiment.last_cycle) if self._data is None: data = Data(self) if self._store_data_object: # for cell_name in self.journal.pages.index: # data[cell_name] = None self._data = data return data else: return self._data
[docs] @abc.abstractmethod def update(self): """Get or link data.""" pass
[docs] def status(self): """Describe the status and health of your experiment.""" raise NotImplementedError
[docs] def info(self): """Print information about the experiment.""" print(self)
[docs]class BaseJournal: """A journal keeps track of the details of the experiment. The journal should at a mimnimum contain information about the name and project the experiment has. Attributes: pages (pandas.DataFrame): table with information about each cell/file. name (str): the name of the experiment (used in db-lookup). project(str): the name of the project the experiment belongs to (used for making folder names). file_name (str or path): the file name used in the to_file method. project_dir: folder where to put the batch (or experiment) files and information. batch_dir: folder in project_dir where summary-files and information and results related to the current experiment are stored. raw_dir: folder in batch_dir where cell-specific information and results are stored (e.g. raw-data, dq/dv data, voltage-capacity cycles). """ packable = ["name", "project", "time_stamp", "project_dir", "batch_dir", "raw_dir"] def __init__(self): self.pages = None # pandas.DataFrame self.session = None # dictionary self.name = None self.project = None self.file_name = None # This is the file-path to the "True" journal file self.time_stamp = None self.project_dir = None self.batch_dir = None self.raw_dir = None def __str__(self): return ( f"({self.__class__.__name__})\n" f" - name: {str(self.name)}\n" f" - project: {str(self.project)}\n" f" - file_name: {str(self.file_name)}\n" f" - pages: ->\n{str(self.pages)}\n" f" - session: ->\n{str(self.session)}\n" f" <-\n" ) def __repr__(self): return self.__class__.__name__ def _prm_packer(self, metadata=None): if metadata is None: _metadata = dict() for p in self.packable: _metadata[p] = getattr(self, p) return _metadata else: for p in metadata: if hasattr(self, p): setattr(self, p, metadata[p]) else: logging.debug(f"unknown variable encountered: {p}")
[docs] def from_db(self): """Make journal pages by looking up a database. Default to using the simple excel "database" provided by cellpy. If you don't have a database, or you don't know how to make and use one, look in the cellpy documentation for other solutions (e.g. manually create a file that can be loaded by the ``from_file`` method). """ logging.debug("not implemented")
[docs] def from_file(self, file_name): raise NotImplementedError
[docs] def create(self): """Create a journal manually""" raise NotImplementedError
[docs] def to_file(self, file_name=None): """Save journal pages to a file. The file can then be used in later sessions using the `from_file` method.""" raise NotImplementedError
[docs] def paginate(self): """Create folders used for saving the different output files.""" raise NotImplementedError
[docs] def generate_file_name(self): """Create a file name for saving the journal.""" logging.debug("not implemented")
# Do-ers
[docs]class BaseExporter(Doer, metaclass=abc.ABCMeta): """An exporter exports your data to a given format.""" def __init__(self, *args): super().__init__(*args) self._use_dir = None self.current_engine = None
[docs] def run_engine(self, engine, **kwargs): logging.debug(f"start engine::{engine.__name__}") self.current_engine = engine self.farms, self.barn = engine(experiments=self.experiments, farms=self.farms) logging.debug("::engine ended")
[docs] def run_dumper(self, dumper): logging.debug(f"start dumper::{dumper.__name__}") dumper( experiments=self.experiments, farms=self.farms, barn=self.barn, engine=self.current_engine, ) logging.debug("::engine ended")
[docs]class BasePlotter(Doer, metaclass=abc.ABCMeta): def __init__(self, *args): super().__init__(*args)
[docs] @abc.abstractmethod def run_engine(self, engine, **kwargs): pass
[docs] @abc.abstractmethod def run_dumper(self, dumper): pass
[docs]class BaseReporter(Doer, metaclass=abc.ABCMeta): def __init__(self, *args): super().__init__(*args)
[docs] @abc.abstractmethod def run_engine(self, engine): pass
[docs] @abc.abstractmethod def run_dumper(self, dumper): pass
[docs]class BaseAnalyzer(Doer, metaclass=abc.ABCMeta): def __init__(self, *args): super().__init__(*args) self.current_engine = None
[docs] def run_engine(self, engine, **kwargs): """Run the engine, build the barn and put the animals on the farm""" logging.debug(f"start engine::{engine.__name__}") self.current_engine = engine self.farms, self.barn = engine( experiments=self.experiments, farms=self.farms, **kwargs ) logging.debug("::engine ended")
[docs] def run_dumper(self, dumper): """Place the animals in the barn""" logging.debug(f"start dumper::{dumper.__name__}") dumper( experiments=self.experiments, farms=self.farms, barn=self.barn, engine=self.current_engine, ) logging.debug("::engine ended")