Source code for timelink.kleio.utilities

""".. module:: utilities
   :synopsis: Various utilities for handling Kleio groups and elements

.. moduleauthor: Joaquim Ramos de Carvalho

Kleio Groups are the building blocks for transcription of historical sources.
"""

from datetime import datetime
import json
import textwrap
from os import linesep as nl
import warnings
from decimal import Decimal


[docs] def kleio_escape(v: str) -> str: """ Checks for Kleio special characters and quotes if needed:: >>> print(kleio_escape('normal string')) normal string >>> print(kleio_escape('oops we have a / in the middle')) "oops we have a / in the middle" """ if v is None: return None # test if v is already quoted if v.strip().startswith('"'): return v s = str(v) if any(i in s for i in "/;=$#%\n\""): if '"' not in s: return '"' + s + '"' else: # double quote in value, need triple quotes return '"""' + s + '"""' else: return s
[docs] def quote_long_text(txt, initial_indent=" " * 4, indent=" " * 2, width=2048, **kwargs) -> str: """Surround long text with triple quotes, wraps and indents lines if needed. Some of the parameters are passed on to :py:func:`textwrap.fill`. Sphynx style markup :param txt: The text to be transformed :type txt: str :param initial_indent: string to ident the first line of paragraphs. Default is 4 spaces. See :py:func:`textwrap.fill`. :type initial_indent: str :param indent: string to ident the wrap lines of paragraphs (after the first). Default is 2 spaces. See :py:func:`textwrap.fill`. :type indent: str :param width: width of line for wrapping. See :py:func:`textwrap.fill`. :type width: int :rtype: str """ if txt is None: return None if width is None: width = 80 if type(txt) is not str: txt = str(txt) # check if text already is triple quoted, starts with """ and end with """ if txt.strip().startswith('"""'): return txt if len(txt) > width or len(txt.splitlines()) > 1: s = '"""' for line in txt.splitlines(): w = textwrap.fill(line, width=width, initial_indent=initial_indent) s = s + textwrap.indent(w, indent) + nl s = s + indent + '"""' elif '"' in txt: s = '"""' + txt + '"""' else: s = kleio_escape(txt) return s
[docs] def get_extra_info_from_obs(obs_text: str) -> tuple[str, dict]: """ Extracts the extra information from the extra_info string and returns a tuple with the cleaned string and a dictionary :param obs_text: The string with extra information (can have other text before) :type obs_text: str :rtype: tuple[str,dict] """ if obs_text is None: return "", {} if "extra_info:" in obs_text: extra_info = obs_text.split("extra_info:")[1].strip() s = obs_text.split("extra_info:")[0].strip() if len(extra_info) > 0: extra_info_dict = json.loads(extra_info) else: extra_info_dict = {} else: s = obs_text extra_info_dict = {} return s, extra_info_dict
[docs] def render_with_extra_info(element_name, element_value, extra_info, **kwargs) -> str: """ Renders a Kleio element with extra information :param element_name: The name of the element :type element_name: str :param element_value: The value of the element from the db :type element_value: str :param extra_info: The extra information dictionary :type extra_info: dict, or str (wiil be handled by :py:func:`get_extra_info`) :param kwargs: Additional parameters for :py:func:`quote_long_text` :rtype: str """ if type(extra_info) is not dict: if type(extra_info) is str: _notused, extra_info = get_extra_info_from_obs(extra_info) else: extra_info = {} if type(element_value) is not str: element_value = quote_long_text(element_value, **kwargs) extras = extra_info.get(element_name, {}) element_comment = extras.get("comment", None) element_original = extras.get("original", None) if element_comment is not None: element_value = f"{element_value}#{quote_long_text(element_comment, **kwargs)}" if element_original is not None: element_value = f"{element_value}%{quote_long_text(element_original, **kwargs)}" return element_value
def _reverse_date_value(data_str: str): """ reverses the sort value of a date: note that it is possible to infer the date type by computing type=value-round(value) type = 0 -> single date round(value) type = 0.3 -> after date round(value) type = -0.3 -> date before round(value) type = 0.1 -> open ended range starting at round(value) type = -0.1 > open start rand ending at round(value) other = range from round(value) to value-round(value) """ value = Decimal(data_str) round_value = round(value) type = value - round_value if type == Decimal("0"): return data_str elif type == Decimal("0.3"): return ">" + str(round_value) elif type == Decimal("-0.3"): return "<" + str(round_value) elif type == Decimal("0.1"): return str(round_value) + ":" elif type == Decimal("-0.1"): return ":" + str(round_value) else: # range dates = data_str.split('.') return dates[0] + ":" + dates[1]