Source code for timelink.app.backend.timelink_webapp

""" Main class for the Timelink web application. """

import os
import json
from typing import List

import pandas
from sqlalchemy import select
from sqlalchemy.orm import selectinload
from sqlalchemy.engine.url import make_url

import timelink
from timelink.api.database import get_postgres_dbnames, get_sqlite_databases
from timelink.app.schemas.project import ProjectSchema
from timelink.kleio.kleio_server import KleioServer
from timelink.app.models import UserDatabase, User, UserProperty  # noqa
from timelink.app.models.project import Project



[docs]
class TimelinkWebApp:
    """A class to interact with the Timelink system
    from a FastAPI web application

    It stores TimelinkDatabase, KleioServer objects
    and Fief (user management) objects.

    Attributes:
        app_name (str): Name of the application.
        timelink_home (str): Directory where the Timelink database is located.
        host_url (str): URL of the Timelink web application.
        kleio_server (KleioServer): A KleioServer instance.
        users_db_type (str): Type of the users database (sqlite or postgres).
        users_db_name (str): Name of the users database.
        users_db (UserDatabase): A UserDatabase instance.
        auth_manager (str): URL of the authentication manager.
        app_manager (str): URL of the application manager.
        kleio_image (str): Name of the Kleio image to use.
        postgres_image (str): Name of the postgres image to use.
        postgres_version (str): Version of the postgres image to use.
        sqlite_dir (str): Directory where the sqlite databases are located.
        stop_duplicates (bool): If True, stop other kleio servers for the same timelink home.

    """
    # this should be set in a Dependency
    after_auth_url = None
    # Url in fief to authenticate
    # must be set with fief.auth_url(redirect_uri="http://localhost:8000")
    # see https://fief-dev.github.io/fief-python/fief_client.html#Fief.auth_url
    auth_url = None
    after_logout_url = None
    # Url in fief to logout
    # must be set with fief.logout_url(redirect_uri="http://localhost:8000")
    # see https://fief-dev.github.io/fief-python/fief_client.html#Fief.logout_url
    logout_url = None

    def __init__(
        self,
        app_name: str = "timelink",
        timelink_url: str = "http://localhost:8008",
        timelink_home: str = None,
        kleio_server: KleioServer = None,
        users_db_type: str = "sqlite",
        users_db_name: str = "timelink_users.sqlite",
        kleio_image=None,
        kleio_version=None,
        kleio_token=None,
        kleio_update=False,
        postgres_image=None,
        postgres_version=None,
        sqlite_dir=None,
        stop_duplicates=True,  # kleio server duplicates
        initial_users: list[User] = None,
        **connection_args,
    ):
        """Create a TimelinkWebApp instance

        Setup of Kleio Server and Timelink
        database is done here.

        Several functions are provided to
        manage the kleio files and access the database.

        Args:
            app_name: name of the application
            timelink_url: URL of the Timelink web application
            timelink_home: directory where the Timelink database is located
            kleio_server: a KleioServer instance
            users_db_type: type of the users database (sqlite or postgres)
            users_db_name: name of the users database
            kleio_image: name of the Kleio image to use
            kleio_version: version of the Kleio image to use
            kleio_token: token to access the Kleio server
            kleio_update: if True, update the Kleio server
            postgres_image: name of the postgres image to use
            postgres_version: version of the postgres image to use
            sqlite_dir: directory where the sqlite databases are located
            initial_users: list of initial users (deprecated)
            stop_duplicates: if True, stop duplicates
            **connection_args: extra arguments to pass to the TimelinkDatabase

        Returns:
            A TimelinkWebApp instance
        """
        self.app_name = app_name
        self.timelink_home = timelink_home
        self.host_url = timelink_url
        self.kleio_server = kleio_server
        self.kleio_version = kleio_version
        self.users_db_type = users_db_type
        self.users_db_name = users_db_name
        self.users_db = None
        self.kleio_image = kleio_image
        self.postgres_image = postgres_image
        self.postgres_version = postgres_version
        self.sqlite_dir = sqlite_dir
        self.stop_duplicates = stop_duplicates
        # deprecated
        self.initial_users = initial_users
        #
        self.kleio_token = kleio_token
        self.kleio_update = kleio_update
        self.projects: List[ProjectSchema] = []

        if initial_users is None:
            self.initial_users = []
        if self.timelink_home is None:
            self.timelink_home = KleioServer.find_local_kleio_home()
        if self.users_db_type == "sqlite":
            if self.sqlite_dir is None:
                self.sqlite_dir = os.path.join(self.timelink_home, "system/db/sqlite")
            if not os.path.exists(self.sqlite_dir):
                os.makedirs(self.sqlite_dir)
            self.users_db = UserDatabase(
                db_type=self.users_db_type,
                db_name=self.users_db_name,
                db_path=self.sqlite_dir,
                stop_duplicates=self.stop_duplicates,
                initial_users=self.initial_users,
                **connection_args,
            )
        elif self.users_db_type == "postgres":
            self.users_db = UserDatabase(
                db_type=self.users_db_type,
                db_name=self.users_db_name,
                postgres_image=self.postgres_image,
                postgres_version=self.postgres_version,
                stop_duplicates=self.stop_duplicates,
                initial_users=self.initial_users,
                **connection_args,
            )
        else:
            raise ValueError(f"Invalid database type: {self.users_db_type}")

        if self.kleio_server is not None:
            self.kleio_server = kleio_server
        else:
            if self.timelink_home is not None:
                self.kleio_server: KleioServer = KleioServer.start(
                    kleio_home=self.timelink_home,
                    kleio_image=self.kleio_image,
                    kleio_version=self.kleio_version,
                    kleio_admin_token=self.kleio_token,
                    update=self.kleio_update,
                    stop_duplicates=self.stop_duplicates,
                )
        self.update_projects()


[docs]
    def get_info(self, show_token=False, show_password=False):
        """Print information about the Timelink Webapp object"""
        if not show_password:
            # mask any password that might be present in the dabase URL
            url = make_url(str(self.users_db.engine.url))
            if url.password:
                url.password = '****'
            db_url = str(url)
        else:
            db_url = str(self.users_db.engine.url)

        info_dict = {
            "Timelink version": timelink.version,
            "Timelink home": self.timelink_home,
            "Timelink host URL": self.host_url,
            "Timelink users database": db_url,
            "Kleio server": self.kleio_server.get_url(),
            "Kleio version requested": self.kleio_version,
            "SQLite directory": self.sqlite_dir,
            "Postgres image": self.postgres_image,
            "Postgres version": self.postgres_version,
        }

        kserver: KleioServer = self.kleio_server
        if kserver is not None:
            info_dict.update(
                {
                    "Kleio server token": kserver.get_token(),
                    "Kleio server URL": kserver.get_url(),
                    "Kleio server home": kserver.get_kleio_home(),
                }

            )
            if not show_token:
                info_dict["Kleio server token"] = kserver.get_token()[:5] + "..."
            if kserver.container is not None:
                info_dict["Kleio server container"] = kserver.container.name
            info_dict["Kleio version requested"] = self.kleio_version
            labels = kserver.container.labels
            build = labels.get("BUILD", "")
            version = labels.get("VERSION", "")
            build_date = labels.get("BUILD_DATE", "")
            if version != "":
                info_dict["Kleio server version"] = f"{version}.{build} ({build_date})"
        if self.users_db_type == "sqlite":
            info_dict["SQLite directory"] = self.sqlite_dir
        elif self.users_db_type == "postgres":
            info_dict.update(
                {
                    "Postgres image": self.postgres_image,
                    "Postgres version": self.postgres_version,
                    "Postgres user": self.db.db_user,
                    "Postgres password": self.db.db_pwd,
                }
            )
            if not show_password:
                info_dict["Postgres password"] = "..."
        return info_dict



[docs]
    def get_project_dirs(self):
        """Get the list of projects

        Projects are sub directories of the
        timelink home directory / projects directory."""
        projects = []
        # get the sub directories of timelink-home/projects
        projects_dir = os.path.join(self.timelink_home, "projects")
        if os.path.exists(projects_dir):
            projects = [
                d
                for d in os.listdir(projects_dir)
                if os.path.isdir(os.path.join(projects_dir, d))
            ]
        return projects



[docs]
    def update_projects(self) -> List[Project]:
        """Get the list of projects

        Get the list of projects from the subdirectories
        of the "projects" directory in the Timelink home directory.

        Check the database for projects entries and merge the two lists
        so that the database has the most recent information.
        """
        if self.timelink_home is None:
            return []
        with self.users_db.session() as session:
            projs = session.scalars(select(Project).options(selectinload(Project.users))).all()
            if projs is not None:
                self.projects = [ProjectSchema.model_validate(proj) for proj in projs]
            else:
                self.projects = []
            existing_project_names = [p.name.upper() for p in self.projects]
            pdirs = self.get_project_dirs()
            for pdir in pdirs:
                if pdir.upper() not in existing_project_names:
                    # todo: check if there is a project settings in the dir
                    project = Project(name=pdir)
                    session.add(project)
                    self.projects.append(project)
            session.commit()
        return self.projects


    def print_info(self):
        info_dict = self.get_info()
        print(json.dumps(info_dict, indent=4))
        print(self.__repr__())


[docs]
    def get_imported_files(self, data_frame=True, **kwargs):
        """Get the list of imported files in the database

        See the get_imported_files method in the TimelinkDatabase class:
        :meth:`timelink.api.database.TimelinkDatabase.get_imported_files`

        Args:
            data_frame: if True, return a pandas DataFrame; otherwise,
                        return a list of dictionaries
            **kwargs: extra arguments to pass to the get_imported_files method
        """
        ifiles = self.db.get_imported_files(**kwargs)

        if data_frame:
            if len(ifiles) == 0:
                return pandas.DataFrame()
            ifiles_json = [f.model_dump() for f in ifiles]
            ifiles_df = pandas.DataFrame(ifiles_json)
            ifiles_df["nerrors"] = ifiles_df["nerrors"].astype("Int64")
            ifiles_df["nwarnings"] = ifiles_df["nerrors"].astype("Int64")
            return ifiles_df
        else:
            return ifiles



[docs]
    def update_from_sources(self, **kwargs):
        """Update the database from a list of sources

        See the update_from_sources method in the TimelinkDatabase class:
        :meth:`timelink.api.database.TimelinkDatabase.update_from_sources`

        """
        self.db.update_from_sources(**kwargs)



[docs]
    def get_import_status(self, data_frame=True, **kwargs):
        """Get the import status of Kleio Files

        Returns:
            A dictionary with the status of the import process
        """
        ifiles = [f.model_dump() for f in self.db.get_import_status(**kwargs)]
        if data_frame:
            if len(ifiles) == 0:
                return pandas.DataFrame()
            # create a pandas Data frame
            ifiles_df = pandas.DataFrame(ifiles)
            # convert the column "status" to the enum value
            ifiles_df["status"] = ifiles_df["status"].apply(lambda x: x.value)
            ifiles_df["import_status"] = ifiles_df["import_status"].apply(
                lambda x: x.value
            )
            # convert the column "import_errors" to int with NA as 0
            # https://stackoverflow.com/questions/21287624/convert-pandas-column-containing-nans-to-dtype-int
            ifiles_df["import_errors"] = ifiles_df["import_errors"].astype("Int64")
            ifiles_df["import_warnings"] = ifiles_df["import_errors"].astype("Int64")
            return ifiles_df.fillna(0)
        else:
            return ifiles



[docs]
    def get_sqlite_databases(self, sqlite_dir=None, **kwargs):
        """Get the list of sqlite databases

        Args:
            sqlite_dir: directory where the sqlite databases are located
            **kwargs: extra arguments to pass to the get_sqlite_databases function

        Returns:
            A list of sqlite databases
        """
        if sqlite_dir is None:
            sqlite_dir = self.sqlite_dir
        return get_sqlite_databases(directory_path=sqlite_dir, **kwargs)



[docs]
    def get_postgres_databases(self):
        """Get the list of postgres databases

        Returns:
            A list of postgres databases
        """
        return get_postgres_dbnames()



[docs]
    def table_row_count_df(self):
        """Return the row count of all tables in the database"""
        tables = self.db.table_row_count()
        tables_df = pandas.DataFrame(tables, columns=["table", "count"])
        return tables_df



[docs]
    def get_file_paths(self, file_spec, rows, column):
        """Get the file paths from DataFrame of from a string

        TODO: #27 add parameter to convert the paths to absolute local paths"""
        if isinstance(file_spec, pandas.DataFrame):
            if column not in file_spec.columns:
                raise Exception(f"There is no {column} in the DataFrame")
            if rows is None:
                raise Exception("The 'rows' argument must be present")
            if type(rows) is not list:
                rows = [rows]
            file_paths = file_spec.iloc[list(rows)][column].tolist()
            return file_paths
        else:
            return []



[docs]
    def get_import_rpt(
        self, file_spec: pandas.DataFrame | str, rows=None, match_path=False, **kwargs
    ):
        """Show the import report for a given file specification

        Args:
            file_spec: file specification (DataFrame or string)
                       If a DataFrame, it should have the columns 'path'
                       and the arguments 'rows' must be present
            rows: if file_spec is a DataFrane, the row number to show
            match_path: if True, the path is used to retrieve the import report;
                        if false the filename is used (default).
            **kwargs: extra arguments to pass to the show_import_rpt method
                      in the TimelinkDatabase class

        """
        rpt = ""
        if match_path:
            column = "path"
        else:
            column = "name"
        if isinstance(file_spec, pandas.DataFrame):
            paths = self.get_file_paths(file_spec, rows, column)
            for file in paths:
                rpt += self.db.get_import_rpt(file, match_path=match_path, **kwargs)
        elif isinstance(file_spec, str):
            return self.db.get_import_rpt(file_spec, match_path=match_path, **kwargs)
        else:
            raise ValueError
        return rpt



[docs]
    def get_translation_report(self, file_spec, rows=None):
        """Show the translation report for a given file specification

        Args:
            file_spec: file specification (DataFrame or string)
                       If a DataFrame, it should have the columns 'rpt_url'
                       and the arguments 'rows' must be present
            rows: if file_spec is a DataFrane, the row number of interest
        """
        rpt = ""
        if isinstance(file_spec, pandas.DataFrame):
            if rows is None:
                raise ValueError(
                    "The 'rows' argument must be present "
                    "if the file_spec is a DataFrame"
                )
            elif type(rows) is not list:
                rows = [rows]
            if len(rows) == 0:
                raise ValueError(
                    "The 'rows' argument must be a non-empty list, or an integer"
                )

            paths = self.get_file_paths(file_spec, rows, "rpt_url")
            for file in paths:
                rpt += self.kleio_server.get_report(file)
        elif isinstance(file_spec, str):
            return self.kleio_server.get_report(file_spec)
        else:
            raise ValueError
        return rpt



[docs]
    def get_kleio_files(self, data_frame=True, **kwargs):
        """Get the list of files in the kleio server.

        Alias to :meth:`timelink.notebooks.TimelinkNotebook.get_import_status`
        but returns a subset of the columns.

            #   Column              Non-Null Count  Dtype
            ---  ------              --------------  -----
            0   path                3 non-null      object
            1   name                3 non-null      object
            2   size                3 non-null      int64
            3   directory           3 non-null      object
            4   modified            3 non-null      datetime64[ns, UTC]
            5   modified_iso        3 non-null      datetime64[ns, UTC]
            6   modified_string     3 non-null      object
            7   qtime               3 non-null      datetime64[ns, UTC]
            8   qtime_string        3 non-null      object
            9   source_url          3 non-null      object
            10  status              3 non-null      object
            11  translated          3 non-null      datetime64[ns, UTC]
            12  translated_string   3 non-null      object
            13  errors              3 non-null      int64
            14  warnings            3 non-null      int64
            15  version             3 non-null      object
            16  rpt_url             3 non-null      object
            17  xml_url             3 non-null      object
            18  import_status       3 non-null      object
            19  import_errors       3 non-null      Int64
            20  import_warnings     3 non-null      Int64
            21  import_error_rpt    3 non-null      object
            22  import_warning_rpt  3 non-null      object
            23  imported            3 non-null      int64
            24  imported_string     3 non-null      int64
        """
        result = self.get_import_status(**kwargs)
        return result[
            [
                "path",
                "name",
                "modified",
                "status",
                "translated",
                "errors",
                "warnings",
                "import_status",
                "import_errors",
                "import_warnings",
                "import_error_rpt",
                "import_warning_rpt",
                "imported",
                "rpt_url",
                "xml_url",
            ]
        ]