""" Main class for the Timelink web application. """
import os
import json
from typing import List
import pandas
from sqlalchemy import select
from sqlalchemy.orm import selectinload
from sqlalchemy.engine.url import make_url
import timelink
from timelink.api.database import get_postgres_dbnames, get_sqlite_databases
from timelink.app.schemas.project import ProjectSchema
from timelink.kleio.kleio_server import KleioServer
from timelink.app.models import UserDatabase, User, UserProperty # noqa
from timelink.app.models.project import Project
[docs]
class TimelinkWebApp:
"""A class to interact with the Timelink system
from a FastAPI web application
It stores TimelinkDatabase, KleioServer objects
and Fief (user management) objects.
Attributes:
app_name (str): Name of the application.
timelink_home (str): Directory where the Timelink database is located.
host_url (str): URL of the Timelink web application.
kleio_server (KleioServer): A KleioServer instance.
users_db_type (str): Type of the users database (sqlite or postgres).
users_db_name (str): Name of the users database.
users_db (UserDatabase): A UserDatabase instance.
auth_manager (str): URL of the authentication manager.
app_manager (str): URL of the application manager.
kleio_image (str): Name of the Kleio image to use.
postgres_image (str): Name of the postgres image to use.
postgres_version (str): Version of the postgres image to use.
sqlite_dir (str): Directory where the sqlite databases are located.
stop_duplicates (bool): If True, stop other kleio servers for the same timelink home.
"""
# this should be set in a Dependency
after_auth_url = None
# Url in fief to authenticate
# must be set with fief.auth_url(redirect_uri="http://localhost:8000")
# see https://fief-dev.github.io/fief-python/fief_client.html#Fief.auth_url
auth_url = None
after_logout_url = None
# Url in fief to logout
# must be set with fief.logout_url(redirect_uri="http://localhost:8000")
# see https://fief-dev.github.io/fief-python/fief_client.html#Fief.logout_url
logout_url = None
def __init__(
self,
app_name: str = "timelink",
timelink_url: str = "http://localhost:8008",
timelink_home: str = None,
kleio_server: KleioServer = None,
users_db_type: str = "sqlite",
users_db_name: str = "timelink_users.sqlite",
kleio_image=None,
kleio_version=None,
kleio_token=None,
kleio_update=False,
postgres_image=None,
postgres_version=None,
sqlite_dir=None,
stop_duplicates=True, # kleio server duplicates
initial_users: list[User] = None,
**connection_args,
):
"""Create a TimelinkWebApp instance
Setup of Kleio Server and Timelink
database is done here.
Several functions are provided to
manage the kleio files and access the database.
Args:
app_name: name of the application
timelink_url: URL of the Timelink web application
timelink_home: directory where the Timelink database is located
kleio_server: a KleioServer instance
users_db_type: type of the users database (sqlite or postgres)
users_db_name: name of the users database
kleio_image: name of the Kleio image to use
kleio_version: version of the Kleio image to use
kleio_token: token to access the Kleio server
kleio_update: if True, update the Kleio server
postgres_image: name of the postgres image to use
postgres_version: version of the postgres image to use
sqlite_dir: directory where the sqlite databases are located
initial_users: list of initial users (deprecated)
stop_duplicates: if True, stop duplicates
**connection_args: extra arguments to pass to the TimelinkDatabase
Returns:
A TimelinkWebApp instance
"""
self.app_name = app_name
self.timelink_home = timelink_home
self.host_url = timelink_url
self.kleio_server = kleio_server
self.kleio_version = kleio_version
self.users_db_type = users_db_type
self.users_db_name = users_db_name
self.users_db = None
self.kleio_image = kleio_image
self.postgres_image = postgres_image
self.postgres_version = postgres_version
self.sqlite_dir = sqlite_dir
self.stop_duplicates = stop_duplicates
# deprecated
self.initial_users = initial_users
#
self.kleio_token = kleio_token
self.kleio_update = kleio_update
self.projects: List[ProjectSchema] = []
if initial_users is None:
self.initial_users = []
if self.timelink_home is None:
self.timelink_home = KleioServer.find_local_kleio_home()
if self.users_db_type == "sqlite":
if self.sqlite_dir is None:
self.sqlite_dir = os.path.join(self.timelink_home, "system/db/sqlite")
if not os.path.exists(self.sqlite_dir):
os.makedirs(self.sqlite_dir)
self.users_db = UserDatabase(
db_type=self.users_db_type,
db_name=self.users_db_name,
db_path=self.sqlite_dir,
stop_duplicates=self.stop_duplicates,
initial_users=self.initial_users,
**connection_args,
)
elif self.users_db_type == "postgres":
self.users_db = UserDatabase(
db_type=self.users_db_type,
db_name=self.users_db_name,
postgres_image=self.postgres_image,
postgres_version=self.postgres_version,
stop_duplicates=self.stop_duplicates,
initial_users=self.initial_users,
**connection_args,
)
else:
raise ValueError(f"Invalid database type: {self.users_db_type}")
if self.kleio_server is not None:
self.kleio_server = kleio_server
else:
if self.timelink_home is not None:
self.kleio_server: KleioServer = KleioServer.start(
kleio_home=self.timelink_home,
kleio_image=self.kleio_image,
kleio_version=self.kleio_version,
kleio_admin_token=self.kleio_token,
update=self.kleio_update,
stop_duplicates=self.stop_duplicates,
)
self.update_projects()
[docs]
def get_info(self, show_token=False, show_password=False):
"""Print information about the Timelink Webapp object"""
if not show_password:
# mask any password that might be present in the dabase URL
url = make_url(str(self.users_db.engine.url))
if url.password:
url.password = '****'
db_url = str(url)
else:
db_url = str(self.users_db.engine.url)
info_dict = {
"Timelink version": timelink.version,
"Timelink home": self.timelink_home,
"Timelink host URL": self.host_url,
"Timelink users database": db_url,
"Kleio server": self.kleio_server.get_url(),
"Kleio version requested": self.kleio_version,
"SQLite directory": self.sqlite_dir,
"Postgres image": self.postgres_image,
"Postgres version": self.postgres_version,
}
kserver: KleioServer = self.kleio_server
if kserver is not None:
info_dict.update(
{
"Kleio server token": kserver.get_token(),
"Kleio server URL": kserver.get_url(),
"Kleio server home": kserver.get_kleio_home(),
}
)
if not show_token:
info_dict["Kleio server token"] = kserver.get_token()[:5] + "..."
if kserver.container is not None:
info_dict["Kleio server container"] = kserver.container.name
info_dict["Kleio version requested"] = self.kleio_version
labels = kserver.container.labels
build = labels.get("BUILD", "")
version = labels.get("VERSION", "")
build_date = labels.get("BUILD_DATE", "")
if version != "":
info_dict["Kleio server version"] = f"{version}.{build} ({build_date})"
if self.users_db_type == "sqlite":
info_dict["SQLite directory"] = self.sqlite_dir
elif self.users_db_type == "postgres":
info_dict.update(
{
"Postgres image": self.postgres_image,
"Postgres version": self.postgres_version,
"Postgres user": self.db.db_user,
"Postgres password": self.db.db_pwd,
}
)
if not show_password:
info_dict["Postgres password"] = "..."
return info_dict
[docs]
def get_project_dirs(self):
"""Get the list of projects
Projects are sub directories of the
timelink home directory / projects directory."""
projects = []
# get the sub directories of timelink-home/projects
projects_dir = os.path.join(self.timelink_home, "projects")
if os.path.exists(projects_dir):
projects = [
d
for d in os.listdir(projects_dir)
if os.path.isdir(os.path.join(projects_dir, d))
]
return projects
[docs]
def update_projects(self) -> List[Project]:
"""Get the list of projects
Get the list of projects from the subdirectories
of the "projects" directory in the Timelink home directory.
Check the database for projects entries and merge the two lists
so that the database has the most recent information.
"""
if self.timelink_home is None:
return []
with self.users_db.session() as session:
projs = session.scalars(select(Project).options(selectinload(Project.users))).all()
if projs is not None:
self.projects = [ProjectSchema.model_validate(proj) for proj in projs]
else:
self.projects = []
existing_project_names = [p.name.upper() for p in self.projects]
pdirs = self.get_project_dirs()
for pdir in pdirs:
if pdir.upper() not in existing_project_names:
# todo: check if there is a project settings in the dir
project = Project(name=pdir)
session.add(project)
self.projects.append(project)
session.commit()
return self.projects
def print_info(self):
info_dict = self.get_info()
print(json.dumps(info_dict, indent=4))
print(self.__repr__())
[docs]
def get_imported_files(self, data_frame=True, **kwargs):
"""Get the list of imported files in the database
See the get_imported_files method in the TimelinkDatabase class:
:meth:`timelink.api.database.TimelinkDatabase.get_imported_files`
Args:
data_frame: if True, return a pandas DataFrame; otherwise,
return a list of dictionaries
**kwargs: extra arguments to pass to the get_imported_files method
"""
ifiles = self.db.get_imported_files(**kwargs)
if data_frame:
if len(ifiles) == 0:
return pandas.DataFrame()
ifiles_json = [f.model_dump() for f in ifiles]
ifiles_df = pandas.DataFrame(ifiles_json)
ifiles_df["nerrors"] = ifiles_df["nerrors"].astype("Int64")
ifiles_df["nwarnings"] = ifiles_df["nerrors"].astype("Int64")
return ifiles_df
else:
return ifiles
[docs]
def update_from_sources(self, **kwargs):
"""Update the database from a list of sources
See the update_from_sources method in the TimelinkDatabase class:
:meth:`timelink.api.database.TimelinkDatabase.update_from_sources`
"""
self.db.update_from_sources(**kwargs)
[docs]
def get_import_status(self, data_frame=True, **kwargs):
"""Get the import status of Kleio Files
Returns:
A dictionary with the status of the import process
"""
ifiles = [f.model_dump() for f in self.db.get_import_status(**kwargs)]
if data_frame:
if len(ifiles) == 0:
return pandas.DataFrame()
# create a pandas Data frame
ifiles_df = pandas.DataFrame(ifiles)
# convert the column "status" to the enum value
ifiles_df["status"] = ifiles_df["status"].apply(lambda x: x.value)
ifiles_df["import_status"] = ifiles_df["import_status"].apply(
lambda x: x.value
)
# convert the column "import_errors" to int with NA as 0
# https://stackoverflow.com/questions/21287624/convert-pandas-column-containing-nans-to-dtype-int
ifiles_df["import_errors"] = ifiles_df["import_errors"].astype("Int64")
ifiles_df["import_warnings"] = ifiles_df["import_errors"].astype("Int64")
return ifiles_df.fillna(0)
else:
return ifiles
[docs]
def get_sqlite_databases(self, sqlite_dir=None, **kwargs):
"""Get the list of sqlite databases
Args:
sqlite_dir: directory where the sqlite databases are located
**kwargs: extra arguments to pass to the get_sqlite_databases function
Returns:
A list of sqlite databases
"""
if sqlite_dir is None:
sqlite_dir = self.sqlite_dir
return get_sqlite_databases(directory_path=sqlite_dir, **kwargs)
[docs]
def get_postgres_databases(self):
"""Get the list of postgres databases
Returns:
A list of postgres databases
"""
return get_postgres_dbnames()
[docs]
def table_row_count_df(self):
"""Return the row count of all tables in the database"""
tables = self.db.table_row_count()
tables_df = pandas.DataFrame(tables, columns=["table", "count"])
return tables_df
[docs]
def get_file_paths(self, file_spec, rows, column):
"""Get the file paths from DataFrame of from a string
TODO: #27 add parameter to convert the paths to absolute local paths"""
if isinstance(file_spec, pandas.DataFrame):
if column not in file_spec.columns:
raise Exception(f"There is no {column} in the DataFrame")
if rows is None:
raise Exception("The 'rows' argument must be present")
if type(rows) is not list:
rows = [rows]
file_paths = file_spec.iloc[list(rows)][column].tolist()
return file_paths
else:
return []
[docs]
def get_import_rpt(
self, file_spec: pandas.DataFrame | str, rows=None, match_path=False, **kwargs
):
"""Show the import report for a given file specification
Args:
file_spec: file specification (DataFrame or string)
If a DataFrame, it should have the columns 'path'
and the arguments 'rows' must be present
rows: if file_spec is a DataFrane, the row number to show
match_path: if True, the path is used to retrieve the import report;
if false the filename is used (default).
**kwargs: extra arguments to pass to the show_import_rpt method
in the TimelinkDatabase class
"""
rpt = ""
if match_path:
column = "path"
else:
column = "name"
if isinstance(file_spec, pandas.DataFrame):
paths = self.get_file_paths(file_spec, rows, column)
for file in paths:
rpt += self.db.get_import_rpt(file, match_path=match_path, **kwargs)
elif isinstance(file_spec, str):
return self.db.get_import_rpt(file_spec, match_path=match_path, **kwargs)
else:
raise ValueError
return rpt
[docs]
def get_translation_report(self, file_spec, rows=None):
"""Show the translation report for a given file specification
Args:
file_spec: file specification (DataFrame or string)
If a DataFrame, it should have the columns 'rpt_url'
and the arguments 'rows' must be present
rows: if file_spec is a DataFrane, the row number of interest
"""
rpt = ""
if isinstance(file_spec, pandas.DataFrame):
if rows is None:
raise ValueError(
"The 'rows' argument must be present "
"if the file_spec is a DataFrame"
)
elif type(rows) is not list:
rows = [rows]
if len(rows) == 0:
raise ValueError(
"The 'rows' argument must be a non-empty list, or an integer"
)
paths = self.get_file_paths(file_spec, rows, "rpt_url")
for file in paths:
rpt += self.kleio_server.get_report(file)
elif isinstance(file_spec, str):
return self.kleio_server.get_report(file_spec)
else:
raise ValueError
return rpt
[docs]
def get_kleio_files(self, data_frame=True, **kwargs):
"""Get the list of files in the kleio server.
Alias to :meth:`timelink.notebooks.TimelinkNotebook.get_import_status`
but returns a subset of the columns.
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 path 3 non-null object
1 name 3 non-null object
2 size 3 non-null int64
3 directory 3 non-null object
4 modified 3 non-null datetime64[ns, UTC]
5 modified_iso 3 non-null datetime64[ns, UTC]
6 modified_string 3 non-null object
7 qtime 3 non-null datetime64[ns, UTC]
8 qtime_string 3 non-null object
9 source_url 3 non-null object
10 status 3 non-null object
11 translated 3 non-null datetime64[ns, UTC]
12 translated_string 3 non-null object
13 errors 3 non-null int64
14 warnings 3 non-null int64
15 version 3 non-null object
16 rpt_url 3 non-null object
17 xml_url 3 non-null object
18 import_status 3 non-null object
19 import_errors 3 non-null Int64
20 import_warnings 3 non-null Int64
21 import_error_rpt 3 non-null object
22 import_warning_rpt 3 non-null object
23 imported 3 non-null int64
24 imported_string 3 non-null int64
"""
result = self.get_import_status(**kwargs)
return result[
[
"path",
"name",
"modified",
"status",
"translated",
"errors",
"warnings",
"import_status",
"import_errors",
"import_warnings",
"import_error_rpt",
"import_warning_rpt",
"imported",
"rpt_url",
"xml_url",
]
]