"""
.. module:: groups
:synopsis: Classes for handling Kleio groups and elements.
.. moduleauthor: Joaquim Ramos de Carvalho
Kleio Groups are the building blocks for transcription of historical sources.
"""
import textwrap
from os import linesep as nl
from typing import Any, Union, Type, Tuple
from box import Box
from timelink.kleio.utilities import quote_long_text
[docs]class KElement:
""" Represents an Element in Kleio language.
While *Groups* represent historical entities (people, objects, events)
*Elements* encapsulate basic items of information (name, gender, date).
The value of an Element can have three possible "aspects":
1) "core": the actual information for the element
2) "original" (optional), the original wording when relevant
3) "comment" (optional), a comment of the value.
Example in Kleio notation::
person$Joaquim Carvalho%Joachim Carvº#Family name added in the margin
Can be generated by ::
KElement('name','Joaquim Carvalho',original='Joachim Carvº',
comment='Family name added in the margin')
n = KElement('name','Joaquim Carvalho',original='Joachim Carvº',
comment='Family name added in the margin')
id = KElement('id','p-jrc')
person = KPerson(id=id,name=n)
"""
name: str
core: Any # must have a str representation.
comment: str = None
original: str = None
_source: str = None
def __init__(self, name: str, val: Any, comment=None, original=None):
"""
Args:
name: name of the Element. A string.
val: the core aspect of the Element. Must have __str__
or a tuple (core,comment,original). If a tuple
optional arguments are disregarded.
comment: Optional; The comment aspect of the Element.
original: Optional; The original aspect of the Element.
"""
self.name = name
if type(val) is tuple and len(val) == 3:
self.core = val[0]
self.comment = val[1]
self.original = val[2]
else:
self.core = val
if comment is not None:
self.comment = comment
if original is not None:
self.original = original
def __str__(self):
c = self.core
cc = self.comment
o = self.original
if c is None:
c = ''
else:
c = quote_long_text(str(c))
if cc is None:
cc = ''
else:
cc = '#' + quote_long_text(str(cc))
if o is None:
o = ''
else:
o = '%' + quote_long_text(str(o))
return c + cc + o
[docs] def is_empty(self):
"""True if all aspects of the element are None or empty string"""
e = [x for x in [self.core, self.comment, self.original] if
x is None or x == '']
if len(e) == 3:
return True
else:
return False
[docs] def to_tuple(self):
""" Return Element as a tuple (core,comment,original)"""
return self.core, self.comment, self.original
[docs] def to_kleio(self):
"""Return element as a kleio string: element=core#comment%original"""
return self.name + '=' + str(self)
[docs] def to_dict(self, name=False):
""" Return Element as a dict {core:_, comment:_, original:_}
add name=True to add name to dictionnary:
{name:_, core:_, comment:_, original:_}"""
if name:
return {'name': self.name,
'core': self.core, 'comment': self.comment,
'original': self.original}
else:
return {'core': self.core, 'comment': self.comment,
'original': self.original}
[docs] def to_dots(self):
return Box(self.to_dict())
[docs]class KGroup:
"""
KGroup(*positional_elements ,**more_elements)
Abstract Kleio Group.
To define a Kleio Group extend this class and set default value for _name.
Define allowed elements in the default values for
_position, _guaranteed, _also (list of strings).
Use _part to list allowed enclosed groups.
For an example see timelink.kleio.groups.KPerson
"""
id: str = '*id*'
_name: str = 'kgroup'
_position: list = []
_guaranteed: list = []
_also: list = []
_part: list = []
_extends: Type['KGroup']
# TODO to_kleio_str generates the definition of a group
# for a kleio str file. recurse=yes
# collects included groups and generates for those also.
@property
def kname(self):
return self._name
[docs] @classmethod
def extend(
cls,
name: str,
position: Union[list, str, None] = None,
guaranteed: Union[list, str, None] = None,
also: Union[list, str, None] = None,
part: Union[list, str, None] = None):
""" Create a new group extending this one
fonte = KGroup.extends('fonte',
also=['tipo',
'data',
'ano',
'obs',
'substitui'])
:type part: KGroup
"""
new_group = type(name, (cls,), {})
new_group._name = name
# todo: k,v in kwargs if in cls set if not error
if position is not None:
new_group._position = position
else:
new_group._position = list(cls._position)
if guaranteed is not None:
new_group._guaranteed = guaranteed
else:
new_group._guaranteed = list(cls._guaranteed)
if also is not None:
new_group._also = also
else:
new_group._also = list(cls._also)
if part is not None:
new_group._part = part
else:
new_group._part = list(cls._part)
new_group._extends = cls
return new_group
[docs] @classmethod
def get_subclasses(cls):
for subclass in cls.__subclasses__():
yield from subclass.get_subclasses()
yield subclass
[docs] @classmethod
def all_subclasses(cls):
return list(cls.get_subclasses())
[docs] @classmethod
def is_kgroup(cls, g):
"""True g is an instance of a subclass of KGroup"""
return 'KGroup' in [c.__name__ for c in type(g).mro()]
[docs] @classmethod
def elements(cls) -> set:
"""Set of Elements allowed in this Group"""
return set(cls._guaranteed).union(set(cls._also)).union(
cls._position)
[docs] @classmethod
def allow_as_part(cls, g: Union[str, type]):
""" Allow g to be enclosed as part of this group.
Arguments:
g: the name of a group, or a subclass of KGroup.
A string or class.
"""
if g not in cls._part:
cls._part.append(g)
def __init__(self, *args, **kwargs):
self._containsd: dict = {}
if len(args) > len(self._position):
raise ValueError('Too many positional elements')
n = 0
# set the positional arguments according to "_position"
for arg in args:
e = self._position[n]
setattr(self, e, KElement(e, arg))
n = n + 1
# keyword arguments must be in one the element lists
for (k, v) in kwargs.items():
if k not in self._position + self._guaranteed + self._also:
raise ValueError(f'Element not allowed: {k}')
if not isinstance(v, KElement): # we did not get a KElement
el = KElement(k, v) # we make one
else: # we got a KElement object
el = v
el.name = k # we override the element name with the arg name
setattr(self, k, el)
# test if the compulsory (guaranteed) elements are present
for g in self._guaranteed:
if getattr(self, g, None) is None:
raise TypeError(
f'Element {g} in _guaranteed '
f'is missing or with None value')
[docs] def include(self, group: Type['KGroup']):
""" Include a group. `group`, its class, must in _part list or
extend a class in the part list.
TODO use a ordered dict for included groups: key class
- at first include a ordered dict is created with the
keys of "part" members and empty lists.
- _contains = OrderedDict([(g,[]) for g in self._part])
- For each incoming group key is determined:
k = self._contains.keys()
ok = True
if g.kname in k:
self._contains[g.kname] = g
elif type(g) in k:
self._contains[type(g)] = g
else: # check if it is a specialization
ok = False
for super in k:
if super in g.mro():
self._contains[super] = g
ok = true
break
if not ok:
raise ....
Returns self so it is possible to chain: g.include(g2).include(g3)"""
allowed = self.is_allowed_as_part(group)
if allowed is None:
raise ValueError(
f'Group {self.kname} cannot contain {group.kname}')
# new style, dictionary based
k = self._containsd.keys()
if allowed in k:
self._containsd[allowed].append(group)
else:
self._containsd[allowed] = [group]
return self
[docs] def is_allowed_as_part(self, group):
""" Test if a group can be included in the current one.
For a group to be allowed for inclusion one of 3 conditions necessary:
1. the kname of the group is in self._pars
2. the type of the group is in self._pars
3. the type of the group inherits from a type in self._pars
Return key under which the group is allowed (kname, type, or super tupe
Return None if not allowed
"""
if not self.is_kgroup(group):
raise TypeError("Argument must be subclass of KGroup")
if group.kname not in self._part:
allowed_classes = [c for c in self._part if type(c) is not str]
super_classes = type(group).mro()
r = list(set(super_classes).intersection(set(allowed_classes)))
if len(r) == 0:
allowed = None
else:
allowed = r[0]
else:
allowed = group.kname
return allowed
[docs] def includes(self, group: Type[Union[str, Type['KGroup']]] = None) -> list:
"""Returns included groups.
Groups are returned by the order in _pars.
TODO this would better be a generator, yield instead of extend
:param str group: filter by group name
"""
if group is not None:
if group in self._containsd.keys():
return self._containsd[group]
else:
inc_by_part_order = []
classes_in_contains = [c for c in self._containsd.keys()
if hasattr(c, 'kname')]
for class_in_contains in classes_in_contains:
inc_by_part_order.extend(
self._containsd[class_in_contains])
return inc_by_part_order
else: # no specific subgroup, we return by pars order
inc_by_part_order = []
for p in self._part:
if p in self._containsd.keys():
inc_by_part_order.extend(self._containsd[p])
return inc_by_part_order
[docs] def attr(
self,
the_type: Union[str, KElement, Tuple[str, str, str]],
value: Union[str, KElement, Tuple[str, str, str]],
date: Union[str, KElement, Tuple[str, str, str]],
obs=None):
""" Utility function to include a KAttribute in this KGroup
The call::
KGroup.attr('age','25','2021-08-08',obs='in May')
is short hand for::
KGroup.include(KAttr('age','25','2021-08-08',obs='in May'))
Params google style
:param str or tuple the_type: core or (core,org,comment)
:param str or tuple value: core or (core,org,comment)
:param str date: date as string in Kleio format, or (date,org,comment)
:param str obs: options observation field
"""
ka = globals()['KAttribute']
self.include(ka(the_type, value, date=date, obs=obs))
return self
[docs] def rel(
self,
the_type: Union[str, tuple],
value: Union[str, tuple],
destname: Union[str, tuple],
destination: Union[str, tuple],
date: Union[str, tuple],
obs: str = None):
""" include a relation in this KGroup"""
kr = globals()['KRelation']
self.include(kr(the_type, value, destname, destination, date, obs))
[docs] def to_kleio(self, indent='') -> str:
""" Return a kleio representation of the group."""
return self.__str__(indent=indent, recurse=True)
[docs] def to_dict(self):
""" Return group information as a dict.
Also available as property "get" so that
group.to_dict()['id'] == group.get['id']
Format of keys:
group[element]: core value of element
group[element_comment]: comment aspect of element
group[element_original]: original aspect of element
group[element_str] : string representation of element
(with # and % if necessary)
group[element_kleio]: kleio representation element=string
group[includes]: list of enclosed groups
group[includes][subgroup]: list of enclosed groups of type subgroup
enclose subgroups can also be accessed in the plural form
if there are no name conflict with existing elements:
group[subgroup+'s'] == group[includes][subgroup]
"""
kd = dict()
for e in self.elements():
v: KElement = getattr(self, e, None)
if v is not None:
if type(v) is KElement:
core, comment, original = v.to_tuple()
kd[e] = core
kd[e + '_comment'] = comment
kd[e + '_original'] = original
kd[e + '_str'] = str(v)
kd[e + '_kleio'] = v.to_kleio()
else:
kd[e] = v
# we now includes subgroups
ki = dict()
# we now collect subgroups by name
included = list(self.includes())
for i in included:
n = i.kname
if n not in ki.keys():
ki[n] = [i.to_dict()]
else:
ki[n].append(i.to_dict())
if len(ki) > 0:
kd['includes'] = ki
# if there are no name conflicts and plural form
# so g['includes']['act'] can be accessed as
# g['acts']
for subgroup in ki.keys():
if subgroup + 's' not in self.elements():
kd[subgroup + 's'] = ki[subgroup]
# we include subgroup indexed by id
# so we can have source['act']['ac010]['person']['p01']
for group in ki[subgroup]:
gid = group.get('id', None)
if gid is not None and subgroup not in self.elements():
if subgroup not in kd.keys():
kd[subgroup] = dict()
kd[subgroup][gid] = group
return kd
@property
def get(self):
return self.to_dict()
[docs] def to_dots(self):
return Box(self.to_dict())
@property
def dots(self):
return self.to_dots()
def __str__(self, indent="", recurse=False):
sname = getattr(self, '_name', self.__class__.__name__)
s = sname + '$'
first = True
out = []
for e in self._position:
v: KElement = getattr(self, e, None)
if v is not None:
if not first:
s = s + '/' + str(v)
else:
s = s + str(v)
first = False
out.append(e)
more = sorted(list(set(self._guaranteed).union(set(self._also)).union(
self._position).difference(out)))
# print(more)
if 'obs' in more: # we like obs elements at the end
more.remove('obs')
more.append('obs')
for e in more:
m: Union[KElement, str] = getattr(self, e, None)
if (
m is not None and
(
type(m) is str and m > '' or
(
issubclass(type(m), KElement) and
not m.is_empty()
))):
# m contains data, lets output
if not first:
s = s + f'/{e}={str(m)}'
else:
s = s + f'{e}={str(m)}'
first = False
if recurse:
for g in self.includes():
s = s + nl + g.__str__(indent + " ", recurse=recurse)
return textwrap.indent(s, indent)
def __getitem__(self, arg):
if arg not in self.elements():
raise ValueError("Element does not exist in group")
return getattr(self, arg)
def __setitem__(self, arg, value):
if arg not in self._position + self._guaranteed + self._also:
raise ValueError(f'Element not allowed: {arg}')
if not isinstance(value, KElement): # we did not get a KElement
el = KElement(arg, value) # we make one
else: # we got a KElement object
el = value
el.name = arg # we override the element name with the arg name
setattr(self, arg, el)
[docs] def get_core(self, *args):
""" get_core(element_name [, default])
Returns the core value of an element
"""
element = args[0]
if len(args) > 1:
default = args[1]
else:
default = None
e = getattr(self, element, None)
if e is None:
return default
else:
return getattr(e, 'core', default)
[docs]class KKleio(KGroup):
"""KKleio(structure,prefix=,obs=,translations=,translator=)
Kleio notation document. Represents a file in Kleio notation.
Elements:
structure: The path to a Kleio structure file (default gacto2.str)
prefix: Prefix to be added to all ids generated from this file
translations: number of times this file was translated
translator: name of the translator to be used (currently not used)
obs: observations
"""
_name = 'kleio'
_position = ['structure']
_also = ['prefix', 'translations', 'translator', 'obs']
_part = ['source', 'aregister']
[docs]class KSource(KGroup):
""" Represents an Historical Source. Sources contain :class:`KAct` and
may contain :class:`KAttribute`.
Elements
id
An unique id for this source.
type
The type of the source (e.g. baptisms, marriages); optional.
loc
Location (name of archive, library, collection); optional.
ref
The call reference ("cota") of the source in the location; optional.
date
The date of the source. A string in timelink format; optional.
- 1582
- 1582-05-04
- 1582:1609
- >1582:<1702
year
A single year. A number. Deprecated, use date instead
obs
Observations on the source (can be long and multiline); optional.
replace
Id of source to be replaced. A string; optional.
The source with this id is removed before importing this one.
Used when changing the id of a file, old id should go here.
Kleio str definition::
part name=historical-source;
guaranteed=id;
also=type,date,year,loc,ref,obs,replace;
position=id,year,type,ref;
part=historical-act
"""
_name = 'source'
_guaranteed = ['id']
_also = ['type', 'date', 'year', 'loc', 'ref', 'replace', 'obs']
_position = ['id']
_part = ['act', 'attr']
KKleio.allow_as_part(KSource)
[docs]class KAct(KGroup):
""" KAct(id,type,date[,day=,month=,year=,loc=,ref=,obs=])
An Act is a record of an event in a specific date.
Elements:
id: an unique id for this act. A string.
type: type of the act (baptism, marriage, contract...). A string.
date: the date of the act. A string in timelink format.
day,month,year: the date expressed in individual values
loc: location of the act in the archive (if different from source)
ref: call number, or page number in source.
Kleio str definition:
part name=historical-act;
guaranteed=id,type,date;
position=id,type,date;
also=loc,ref,obs,day,month,year;
arbitrary=person,object,geoentity,abstraction,ls,atr,rel
"""
_name = 'act'
_guaranteed = ['id', 'type', 'date']
_position = ['id', 'type', 'date']
_also = ['loc', 'ref', 'obs', 'day', 'month', 'year']
_part = ['person', 'object', 'geoentity', 'abstraction', 'ls', 'attr',
'rel']
KSource.allow_as_part(KAct)
[docs]class KPerson(KGroup):
""" KPerson(name,sex,id,obs=,same_as=,xsame_as=)
Person in a historical source
Elements:
name: the name of the person. A string.
sex: the gender of the person. A string.
id: an unique id for this person. A string, optional.
obs: a note on the person. A string, optional.
same_as: id of another instance of this person in the same file.
xsame_as: id of another instance of this person in another file.
Kleio str definition:
part name=person ;
guaranteed=name,sex;
also=id,obs,same_as;
position=name,sex,id,same_as;
arbitrary=atr,rel,ls
"""
_name = 'person'
_guaranteed = ['name', 'sex']
_also = ['id', 'obs', 'same_as']
_position = ['name', 'sex', 'id', 'same_as', 'xsame_as']
_part = ['rel', 'attr']
KAct.allow_as_part(KPerson)
[docs]class KObject(KGroup):
""" KObject(name,type,id=,obs=,same_as=,xsame_as=)
An object in a historical source.
Object groups represent physical entities like
houses, pieces of land, movable objects
Elements:
name: the name of the object. A string.
type: the . A string.
id: an unique id for this person. A string, optional.
obs: a note on the person. A string, optional.
same_as: id of another instance of this object in the same file.
xsame_as: id of another instance of this object in another file.
Kleio str definition:
part name=object;
guaranteed=name;
position=name,type;
also=obs,id,same_as,xsame_as;
arbitrary=atr,ls,rel
"""
_name = 'object'
_guaranteed = ['name']
_also = ['id', 'obs', 'same_as']
_position = ['name', 'sex', 'id', 'same_as', 'xsame_as']
_part = ['rel', 'attr']
KAct.allow_as_part(KObject)
[docs]class KAbstraction(KObject):
""" KAbstraction(name,type,id=,obs=,same_as=,xsame_as=)
A synonym for object, used in non physical entities such as
institutions.
"""
pass
KAct.allow_as_part(KAbstraction)
[docs]class KAttribute(KGroup):
""" KAttribute(type,value,[date, obs=])
Time varying attribute of a person, object, or other
Attributes represent time-varying information about historical entities.
Each attribute has a type ('address','profession', 'age'), a value and
a date associated with the attribute.
Elements:
type: the type of the attribute. A String
value: the value of the attribute. A string.
date: the date of attribute. A string in Timelink format, optional.
obs: a note on the attribute. A string optional.
Kleio str definition :
part name=attribute ;
guaranteed=type,value ;
also=obs,date ;
position=type,value,date
"""
_name = 'attr'
_guaranteed = ['type', 'value']
_also = ['date', 'obs']
_position = ['type', 'value', 'date']
KPerson.allow_as_part(KAttribute)
KAct.allow_as_part(KAttribute)
KSource.allow_as_part(KAttribute)
[docs]class KLs(KAttribute):
"""Synonym for KAttribute"""
_name = 'ls'
[docs]class KAtr(KAttribute):
"""Synonym for KAttribute"""
_name = 'atr'
[docs]class KRelation(KGroup):
""" KRelation(type,value,destname,destination[,date=,obs=])
A relation between historical entities.
Relations have a type, a value, a date and a destination.
The origin of the relation is the entity represented by the
group that includes the relation.
Elements:
type: the type of the relation. A String
value: the value of the relation. A string.
destination: the id of the destination of the relation. A string.
destname: the name of the destination of the relation. A string
date: the date of relation. A string in Timelink format, optional.
obs: a note on the attribute. A string optional.
Kleio stru definition:
part name=relation ;
position=type,value,destname,destination;
guaranteed=type,value,destname,destination ;
also=obs,date
"""
_name = 'rel'
_position = ['type', 'value', 'destname', 'destination']
_guaranteed = ['type', 'value', 'destname', 'destination']
_also = ['obs', 'date']
KPerson.allow_as_part(KRelation)
KAct.allow_as_part(KRelation)
KSource.allow_as_part(KRelation)