Source code for timelink.kleio.groups

"""
.. module:: groups
   :synopsis: Classes for handling Kleio groups and elements.

.. moduleauthor: Joaquim Ramos de Carvalho

Kleio Groups are the building blocks for transcription of historical sources.

"""
import textwrap
from os import linesep as nl
from typing import Any, Union, Type, Tuple

from box import Box

from timelink.kleio.utilities import quote_long_text


[docs]class KElement: """ Represents an Element in Kleio language. While *Groups* represent historical entities (people, objects, events) *Elements* encapsulate basic items of information (name, gender, date). The value of an Element can have three possible "aspects": 1) "core": the actual information for the element 2) "original" (optional), the original wording when relevant 3) "comment" (optional), a comment of the value. Example in Kleio notation:: person$Joaquim Carvalho%Joachim Carvº#Family name added in the margin Can be generated by :: KElement('name','Joaquim Carvalho',original='Joachim Carvº', comment='Family name added in the margin') n = KElement('name','Joaquim Carvalho',original='Joachim Carvº', comment='Family name added in the margin') id = KElement('id','p-jrc') person = KPerson(id=id,name=n) """ name: str core: Any # must have a str representation. comment: str = None original: str = None _source: str = None def __init__(self, name: str, val: Any, comment=None, original=None): """ Args: name: name of the Element. A string. val: the core aspect of the Element. Must have __str__ or a tuple (core,comment,original). If a tuple optional arguments are disregarded. comment: Optional; The comment aspect of the Element. original: Optional; The original aspect of the Element. """ self.name = name if type(val) is tuple and len(val) == 3: self.core = val[0] self.comment = val[1] self.original = val[2] else: self.core = val if comment is not None: self.comment = comment if original is not None: self.original = original def __str__(self): c = self.core cc = self.comment o = self.original if c is None: c = '' else: c = quote_long_text(str(c)) if cc is None: cc = '' else: cc = '#' + quote_long_text(str(cc)) if o is None: o = '' else: o = '%' + quote_long_text(str(o)) return c + cc + o
[docs] def is_empty(self): """True if all aspects of the element are None or empty string""" e = [x for x in [self.core, self.comment, self.original] if x is None or x == ''] if len(e) == 3: return True else: return False
[docs] def to_tuple(self): """ Return Element as a tuple (core,comment,original)""" return self.core, self.comment, self.original
[docs] def to_kleio(self): """Return element as a kleio string: element=core#comment%original""" return self.name + '=' + str(self)
[docs] def to_dict(self, name=False): """ Return Element as a dict {core:_, comment:_, original:_} add name=True to add name to dictionnary: {name:_, core:_, comment:_, original:_}""" if name: return {'name': self.name, 'core': self.core, 'comment': self.comment, 'original': self.original} else: return {'core': self.core, 'comment': self.comment, 'original': self.original}
[docs] def to_dots(self): return Box(self.to_dict())
[docs]class KGroup: """ KGroup(*positional_elements ,**more_elements) Abstract Kleio Group. To define a Kleio Group extend this class and set default value for _name. Define allowed elements in the default values for _position, _guaranteed, _also (list of strings). Use _part to list allowed enclosed groups. For an example see timelink.kleio.groups.KPerson """ id: str = '*id*' _name: str = 'kgroup' _position: list = [] _guaranteed: list = [] _also: list = [] _part: list = [] _extends: Type['KGroup'] # TODO to_kleio_str generates the definition of a group # for a kleio str file. recurse=yes # collects included groups and generates for those also. @property def kname(self): return self._name
[docs] @classmethod def extend( cls, name: str, position: Union[list, str, None] = None, guaranteed: Union[list, str, None] = None, also: Union[list, str, None] = None, part: Union[list, str, None] = None): """ Create a new group extending this one fonte = KGroup.extends('fonte', also=['tipo', 'data', 'ano', 'obs', 'substitui']) :type part: KGroup """ new_group = type(name, (cls,), {}) new_group._name = name # todo: k,v in kwargs if in cls set if not error if position is not None: new_group._position = position else: new_group._position = list(cls._position) if guaranteed is not None: new_group._guaranteed = guaranteed else: new_group._guaranteed = list(cls._guaranteed) if also is not None: new_group._also = also else: new_group._also = list(cls._also) if part is not None: new_group._part = part else: new_group._part = list(cls._part) new_group._extends = cls return new_group
[docs] @classmethod def get_subclasses(cls): for subclass in cls.__subclasses__(): yield from subclass.get_subclasses() yield subclass
[docs] @classmethod def all_subclasses(cls): return list(cls.get_subclasses())
[docs] @classmethod def is_kgroup(cls, g): """True g is an instance of a subclass of KGroup""" return 'KGroup' in [c.__name__ for c in type(g).mro()]
[docs] @classmethod def elements(cls) -> set: """Set of Elements allowed in this Group""" return set(cls._guaranteed).union(set(cls._also)).union( cls._position)
[docs] @classmethod def allow_as_part(cls, g: Union[str, type]): """ Allow g to be enclosed as part of this group. Arguments: g: the name of a group, or a subclass of KGroup. A string or class. """ if g not in cls._part: cls._part.append(g)
def __init__(self, *args, **kwargs): self._containsd: dict = {} if len(args) > len(self._position): raise ValueError('Too many positional elements') n = 0 # set the positional arguments according to "_position" for arg in args: e = self._position[n] setattr(self, e, KElement(e, arg)) n = n + 1 # keyword arguments must be in one the element lists for (k, v) in kwargs.items(): if k not in self._position + self._guaranteed + self._also: raise ValueError(f'Element not allowed: {k}') if not isinstance(v, KElement): # we did not get a KElement el = KElement(k, v) # we make one else: # we got a KElement object el = v el.name = k # we override the element name with the arg name setattr(self, k, el) # test if the compulsory (guaranteed) elements are present for g in self._guaranteed: if getattr(self, g, None) is None: raise TypeError( f'Element {g} in _guaranteed ' f'is missing or with None value')
[docs] def include(self, group: Type['KGroup']): """ Include a group. `group`, its class, must in _part list or extend a class in the part list. TODO use a ordered dict for included groups: key class - at first include a ordered dict is created with the keys of "part" members and empty lists. - _contains = OrderedDict([(g,[]) for g in self._part]) - For each incoming group key is determined: k = self._contains.keys() ok = True if g.kname in k: self._contains[g.kname] = g elif type(g) in k: self._contains[type(g)] = g else: # check if it is a specialization ok = False for super in k: if super in g.mro(): self._contains[super] = g ok = true break if not ok: raise .... Returns self so it is possible to chain: g.include(g2).include(g3)""" allowed = self.is_allowed_as_part(group) if allowed is None: raise ValueError( f'Group {self.kname} cannot contain {group.kname}') # new style, dictionary based k = self._containsd.keys() if allowed in k: self._containsd[allowed].append(group) else: self._containsd[allowed] = [group] return self
[docs] def is_allowed_as_part(self, group): """ Test if a group can be included in the current one. For a group to be allowed for inclusion one of 3 conditions necessary: 1. the kname of the group is in self._pars 2. the type of the group is in self._pars 3. the type of the group inherits from a type in self._pars Return key under which the group is allowed (kname, type, or super tupe Return None if not allowed """ if not self.is_kgroup(group): raise TypeError("Argument must be subclass of KGroup") if group.kname not in self._part: allowed_classes = [c for c in self._part if type(c) is not str] super_classes = type(group).mro() r = list(set(super_classes).intersection(set(allowed_classes))) if len(r) == 0: allowed = None else: allowed = r[0] else: allowed = group.kname return allowed
[docs] def includes(self, group: Type[Union[str, Type['KGroup']]] = None) -> list: """Returns included groups. Groups are returned by the order in _pars. TODO this would better be a generator, yield instead of extend :param str group: filter by group name """ if group is not None: if group in self._containsd.keys(): return self._containsd[group] else: inc_by_part_order = [] classes_in_contains = [c for c in self._containsd.keys() if hasattr(c, 'kname')] for class_in_contains in classes_in_contains: inc_by_part_order.extend( self._containsd[class_in_contains]) return inc_by_part_order else: # no specific subgroup, we return by pars order inc_by_part_order = [] for p in self._part: if p in self._containsd.keys(): inc_by_part_order.extend(self._containsd[p]) return inc_by_part_order
[docs] def attr( self, the_type: Union[str, KElement, Tuple[str, str, str]], value: Union[str, KElement, Tuple[str, str, str]], date: Union[str, KElement, Tuple[str, str, str]], obs=None): """ Utility function to include a KAttribute in this KGroup The call:: KGroup.attr('age','25','2021-08-08',obs='in May') is short hand for:: KGroup.include(KAttr('age','25','2021-08-08',obs='in May')) Params google style :param str or tuple the_type: core or (core,org,comment) :param str or tuple value: core or (core,org,comment) :param str date: date as string in Kleio format, or (date,org,comment) :param str obs: options observation field """ ka = globals()['KAttribute'] self.include(ka(the_type, value, date=date, obs=obs)) return self
[docs] def rel( self, the_type: Union[str, tuple], value: Union[str, tuple], destname: Union[str, tuple], destination: Union[str, tuple], date: Union[str, tuple], obs: str = None): """ include a relation in this KGroup""" kr = globals()['KRelation'] self.include(kr(the_type, value, destname, destination, date, obs))
[docs] def to_kleio(self, indent='') -> str: """ Return a kleio representation of the group.""" return self.__str__(indent=indent, recurse=True)
[docs] def to_dict(self): """ Return group information as a dict. Also available as property "get" so that group.to_dict()['id'] == group.get['id'] Format of keys: group[element]: core value of element group[element_comment]: comment aspect of element group[element_original]: original aspect of element group[element_str] : string representation of element (with # and % if necessary) group[element_kleio]: kleio representation element=string group[includes]: list of enclosed groups group[includes][subgroup]: list of enclosed groups of type subgroup enclose subgroups can also be accessed in the plural form if there are no name conflict with existing elements: group[subgroup+'s'] == group[includes][subgroup] """ kd = dict() for e in self.elements(): v: KElement = getattr(self, e, None) if v is not None: if type(v) is KElement: core, comment, original = v.to_tuple() kd[e] = core kd[e + '_comment'] = comment kd[e + '_original'] = original kd[e + '_str'] = str(v) kd[e + '_kleio'] = v.to_kleio() else: kd[e] = v # we now includes subgroups ki = dict() # we now collect subgroups by name included = list(self.includes()) for i in included: n = i.kname if n not in ki.keys(): ki[n] = [i.to_dict()] else: ki[n].append(i.to_dict()) if len(ki) > 0: kd['includes'] = ki # if there are no name conflicts and plural form # so g['includes']['act'] can be accessed as # g['acts'] for subgroup in ki.keys(): if subgroup + 's' not in self.elements(): kd[subgroup + 's'] = ki[subgroup] # we include subgroup indexed by id # so we can have source['act']['ac010]['person']['p01'] for group in ki[subgroup]: gid = group.get('id', None) if gid is not None and subgroup not in self.elements(): if subgroup not in kd.keys(): kd[subgroup] = dict() kd[subgroup][gid] = group return kd
@property def get(self): return self.to_dict()
[docs] def to_dots(self): return Box(self.to_dict())
@property def dots(self): return self.to_dots() def __str__(self, indent="", recurse=False): sname = getattr(self, '_name', self.__class__.__name__) s = sname + '$' first = True out = [] for e in self._position: v: KElement = getattr(self, e, None) if v is not None: if not first: s = s + '/' + str(v) else: s = s + str(v) first = False out.append(e) more = sorted(list(set(self._guaranteed).union(set(self._also)).union( self._position).difference(out))) # print(more) if 'obs' in more: # we like obs elements at the end more.remove('obs') more.append('obs') for e in more: m: Union[KElement, str] = getattr(self, e, None) if ( m is not None and ( type(m) is str and m > '' or ( issubclass(type(m), KElement) and not m.is_empty() ))): # m contains data, lets output if not first: s = s + f'/{e}={str(m)}' else: s = s + f'{e}={str(m)}' first = False if recurse: for g in self.includes(): s = s + nl + g.__str__(indent + " ", recurse=recurse) return textwrap.indent(s, indent) def __getitem__(self, arg): if arg not in self.elements(): raise ValueError("Element does not exist in group") return getattr(self, arg) def __setitem__(self, arg, value): if arg not in self._position + self._guaranteed + self._also: raise ValueError(f'Element not allowed: {arg}') if not isinstance(value, KElement): # we did not get a KElement el = KElement(arg, value) # we make one else: # we got a KElement object el = value el.name = arg # we override the element name with the arg name setattr(self, arg, el)
[docs] def get_core(self, *args): """ get_core(element_name [, default]) Returns the core value of an element """ element = args[0] if len(args) > 1: default = args[1] else: default = None e = getattr(self, element, None) if e is None: return default else: return getattr(e, 'core', default)
[docs]class KKleio(KGroup): """KKleio(structure,prefix=,obs=,translations=,translator=) Kleio notation document. Represents a file in Kleio notation. Elements: structure: The path to a Kleio structure file (default gacto2.str) prefix: Prefix to be added to all ids generated from this file translations: number of times this file was translated translator: name of the translator to be used (currently not used) obs: observations """ _name = 'kleio' _position = ['structure'] _also = ['prefix', 'translations', 'translator', 'obs'] _part = ['source', 'aregister']
[docs]class KSource(KGroup): """ Represents an Historical Source. Sources contain :class:`KAct` and may contain :class:`KAttribute`. Elements id An unique id for this source. type The type of the source (e.g. baptisms, marriages); optional. loc Location (name of archive, library, collection); optional. ref The call reference ("cota") of the source in the location; optional. date The date of the source. A string in timelink format; optional. - 1582 - 1582-05-04 - 1582:1609 - >1582:<1702 year A single year. A number. Deprecated, use date instead obs Observations on the source (can be long and multiline); optional. replace Id of source to be replaced. A string; optional. The source with this id is removed before importing this one. Used when changing the id of a file, old id should go here. Kleio str definition:: part name=historical-source; guaranteed=id; also=type,date,year,loc,ref,obs,replace; position=id,year,type,ref; part=historical-act """ _name = 'source' _guaranteed = ['id'] _also = ['type', 'date', 'year', 'loc', 'ref', 'replace', 'obs'] _position = ['id'] _part = ['act', 'attr']
KKleio.allow_as_part(KSource)
[docs]class KAct(KGroup): """ KAct(id,type,date[,day=,month=,year=,loc=,ref=,obs=]) An Act is a record of an event in a specific date. Elements: id: an unique id for this act. A string. type: type of the act (baptism, marriage, contract...). A string. date: the date of the act. A string in timelink format. day,month,year: the date expressed in individual values loc: location of the act in the archive (if different from source) ref: call number, or page number in source. Kleio str definition: part name=historical-act; guaranteed=id,type,date; position=id,type,date; also=loc,ref,obs,day,month,year; arbitrary=person,object,geoentity,abstraction,ls,atr,rel """ _name = 'act' _guaranteed = ['id', 'type', 'date'] _position = ['id', 'type', 'date'] _also = ['loc', 'ref', 'obs', 'day', 'month', 'year'] _part = ['person', 'object', 'geoentity', 'abstraction', 'ls', 'attr', 'rel']
KSource.allow_as_part(KAct)
[docs]class KPerson(KGroup): """ KPerson(name,sex,id,obs=,same_as=,xsame_as=) Person in a historical source Elements: name: the name of the person. A string. sex: the gender of the person. A string. id: an unique id for this person. A string, optional. obs: a note on the person. A string, optional. same_as: id of another instance of this person in the same file. xsame_as: id of another instance of this person in another file. Kleio str definition: part name=person ; guaranteed=name,sex; also=id,obs,same_as; position=name,sex,id,same_as; arbitrary=atr,rel,ls """ _name = 'person' _guaranteed = ['name', 'sex'] _also = ['id', 'obs', 'same_as'] _position = ['name', 'sex', 'id', 'same_as', 'xsame_as'] _part = ['rel', 'attr']
KAct.allow_as_part(KPerson)
[docs]class KObject(KGroup): """ KObject(name,type,id=,obs=,same_as=,xsame_as=) An object in a historical source. Object groups represent physical entities like houses, pieces of land, movable objects Elements: name: the name of the object. A string. type: the . A string. id: an unique id for this person. A string, optional. obs: a note on the person. A string, optional. same_as: id of another instance of this object in the same file. xsame_as: id of another instance of this object in another file. Kleio str definition: part name=object; guaranteed=name; position=name,type; also=obs,id,same_as,xsame_as; arbitrary=atr,ls,rel """ _name = 'object' _guaranteed = ['name'] _also = ['id', 'obs', 'same_as'] _position = ['name', 'sex', 'id', 'same_as', 'xsame_as'] _part = ['rel', 'attr']
KAct.allow_as_part(KObject)
[docs]class KAbstraction(KObject): """ KAbstraction(name,type,id=,obs=,same_as=,xsame_as=) A synonym for object, used in non physical entities such as institutions. """ pass
KAct.allow_as_part(KAbstraction)
[docs]class KAttribute(KGroup): """ KAttribute(type,value,[date, obs=]) Time varying attribute of a person, object, or other Attributes represent time-varying information about historical entities. Each attribute has a type ('address','profession', 'age'), a value and a date associated with the attribute. Elements: type: the type of the attribute. A String value: the value of the attribute. A string. date: the date of attribute. A string in Timelink format, optional. obs: a note on the attribute. A string optional. Kleio str definition : part name=attribute ; guaranteed=type,value ; also=obs,date ; position=type,value,date """ _name = 'attr' _guaranteed = ['type', 'value'] _also = ['date', 'obs'] _position = ['type', 'value', 'date']
KPerson.allow_as_part(KAttribute) KAct.allow_as_part(KAttribute) KSource.allow_as_part(KAttribute)
[docs]class KLs(KAttribute): """Synonym for KAttribute""" _name = 'ls'
[docs]class KAtr(KAttribute): """Synonym for KAttribute""" _name = 'atr'
[docs]class KRelation(KGroup): """ KRelation(type,value,destname,destination[,date=,obs=]) A relation between historical entities. Relations have a type, a value, a date and a destination. The origin of the relation is the entity represented by the group that includes the relation. Elements: type: the type of the relation. A String value: the value of the relation. A string. destination: the id of the destination of the relation. A string. destname: the name of the destination of the relation. A string date: the date of relation. A string in Timelink format, optional. obs: a note on the attribute. A string optional. Kleio stru definition: part name=relation ; position=type,value,destname,destination; guaranteed=type,value,destname,destination ; also=obs,date """ _name = 'rel' _position = ['type', 'value', 'destname', 'destination'] _guaranteed = ['type', 'value', 'destname', 'destination'] _also = ['obs', 'date']
KPerson.allow_as_part(KRelation) KAct.allow_as_part(KRelation) KSource.allow_as_part(KRelation)