Source code for cellpy.utils.batch_tools.sqlite_from_excel_db

from dataclasses import dataclass
import logging
import sqlite3
import pathlib
from pprint import pprint
import sqlalchemy as sa
import pandas as pd

import cellpy
from cellpy import prms
from cellpy.parameters.internal_settings import (
    TABLE_NAME_SQLITE,
    COLUMNS_RENAMER,
)


[docs]
DB_FILE_EXCEL = prms.Paths.db_filename


[docs]
DB_FILE_SQLITE = prms.Db.db_file_sqlite


[docs]
TABLE_NAME_EXCEL = prms.Db.db_table_name


[docs]
HEADER_ROW = prms.Db.db_header_row


[docs]
UNIT_ROW = prms.Db.db_unit_row



@dataclass

[docs]
class DbColsRenamer:

[docs]
    cellpy_col: str = ""


[docs]
    dtype: str = ""


[docs]
    excel_col: str = ""


[docs]
    db_col: str = ""





[docs]
def create_column_names_from_prms():
    """Create a list of DbColsRenamer objects from the cellpy.prms.DbCols object."""
    logging.debug(cellpy.prms.DbCols.keys())
    logging.debug("----")
    attrs = cellpy.prms.DbCols.keys()
    dtypes = cellpy.prms._db_cols_unit
    columns = []
    for attr in attrs:
        if attr in COLUMNS_RENAMER:
            db_col = COLUMNS_RENAMER[attr]
        else:
            db_col = attr
        col = DbColsRenamer(
            cellpy_col=attr,
            dtype=getattr(dtypes, attr),
            excel_col=getattr(cellpy.prms.DbCols, attr),
            db_col=db_col,
        )
        columns.append(col)
        logging.debug(col)
        logging.debug("----")
    return columns




[docs]
def load_xlsx(
    db_file=DB_FILE_EXCEL,
    table_name=TABLE_NAME_EXCEL,
    header_row=HEADER_ROW,
    unit_row=UNIT_ROW,
):
    """Load the Excel file and return a pandas dataframe."""
    work_book = pd.ExcelFile(db_file, engine="openpyxl")
    sheet = work_book.parse(table_name, header=header_row, skiprows=[unit_row])
    return sheet




[docs]
def save_sqlite(
    sheet, out_file=DB_FILE_SQLITE, table_name=TABLE_NAME_SQLITE, set_index=False
):
    """Save the pandas dataframe to a sqlite database."""
    uri = f"sqlite:///{out_file}"
    logging.debug(f"Saving to sqlite ({uri})...")
    engine = sa.create_engine(uri, echo=False)
    if set_index:
        sheet = sheet.set_index(COLUMNS_RENAMER["id"])
    sheet.to_sql(table_name, con=engine, if_exists="replace")




[docs]
def clean_up(df, columns):
    """Clean up the dataframe and return using 'proper cellpy headers'."""
    logging.debug("Cleaning up ...")
    logging.debug(" converting ...")

    final_columns = {}
    for col in columns:
        excel_col = col.excel_col
        cellpy_col = col.cellpy_col
        t = col.dtype
        db_col = col.db_col
        if excel_col not in df.columns:
            logging.debug(f"  {excel_col} not in df.columns")
            continue
        logging.debug(
            f"  {cellpy_col} = {excel_col} [{df[excel_col].dtype}]:({t}) --> "
        )
        if t == "int":
            df[excel_col] = df[excel_col].fillna(0)
            try:
                df[excel_col] = df[excel_col].str.replace(",", ".")
            except AttributeError:
                pass
            df[excel_col] = df[excel_col].astype("int")
        elif t == "float":
            df[excel_col] = df[excel_col].fillna(0)
            try:
                df[excel_col] = df[excel_col].str.replace(",", ".")
            except AttributeError:
                pass
            if col == "temperature":
                df[excel_col] = df[excel_col].replace("RT", 25)
            df[excel_col] = df[excel_col].astype("float")
        elif t == "str":
            df[excel_col] = df[excel_col].fillna("")
            df[excel_col] = df[excel_col].astype("str")
        logging.debug(f"[{df[excel_col].dtype}]")
        df = df.rename(columns={excel_col: db_col})
        final_columns[
            db_col
        ] = db_col  # modify this if you want to rename columns again
    logging.debug("Selecting...")
    df = df[final_columns.keys()]
    logging.debug("Renaming to cellpy names...")
    df = df.rename(columns=final_columns)
    return df




[docs]
def run():
    db_exel_file = (
        pathlib.Path(cellpy.prms.Paths.db_path) / cellpy.prms.Paths.db_filename
    )
    db_sqlite_file = pathlib.Path(cellpy.prms.Paths.db_path) / DB_FILE_SQLITE
    columns = create_column_names_from_prms()
    df = load_xlsx(db_file=db_exel_file)
    df = clean_up(df, columns=columns)
    save_sqlite(df, out_file=db_sqlite_file)




[docs]
def main():
    db_exel_file = pathlib.Path(sys.argv[1])
    if not db_exel_file.exists():
        print(f"File not found: {db_exel_file}")
        sys.exit(1)
    db_sqlite_file = pathlib.Path(cellpy.prms.Paths.db_path) / DB_FILE_SQLITE
    columns = create_column_names_from_prms()
    df = load_xlsx(db_file=db_exel_file)
    df = clean_up(df, columns=columns)
    save_sqlite(df, out_file=db_sqlite_file)



def _check():
    print("Settings:")
    print(f"{cellpy.prms.Paths.db_path=}")
    print(f"{cellpy.prms.Paths.db_filename=}")

    print("But choosing:")
    db_exel_file = pathlib.Path("2022_Cell_Analysis_db_001.xlsx").resolve()
    print(f"{db_exel_file=}")

    columns = create_column_names_from_prms()
    df = load_xlsx(db_file=db_exel_file)
    df = clean_up(df, columns=columns)
    print("cleaned up:")
    print(df.columns)
    save_sqlite(df)


if __name__ == "__main__":
    main()