Source code for timelink.networks.network_generation

"""Generation of networks"""

from itertools import combinations

import networkx as nx

from timelink.api.database import TimelinkDatabase
from timelink.api.models.entity import Entity
from timelink.kleio.utilities import convert_timelink_date as ctd
from timelink.kleio.utilities import format_timelink_date as ftd
from timelink.pandas import attribute_values, entities_with_attribute


[docs] def network_from_attribute( attribute: str, ignore_values: list[str] | None = None, mode="cliques", by_year=False, # add year nodes between entities and values user=None, db: TimelinkDatabase | None = None, session=None, ) -> nx.Graph: """ Generate a network from common attribute values. This function will generate a network connecting the entities that have the same value for the attribute given in the parameter. Args: attribute (str): The name (type) of the attribute used for generating the graph. ignore_values (list[str], optional): A list of values to ignore when generating the network. Defaults to None. mode (str, optional): The topology of the generated network (see bellow). Valid values are "cliques" and "value-node". Defaults to "cliques". user (str, optional): Use real persons identified by this user. Defaults to "*none*". db (TimelinkDatase, optional): The TimelinkDatase object. Defaults to None. Either db or session must be provided. session (object, optional): The session object for the database connection. Defaults to None. Raises: None Topology the generated network: * If mode = "cliques" all the entities with attribute will be nodes in the graph and edges will be created between the entities with the same value of the attribute. * If mode = "value-node" a node will be created for each different value of the attribute a and edges will be created linking that node to the entities which have that value in attribute a. * In both cases entities with several values for the attribute contribute to the overall connectivity of the graph, by linking clusters of same value entities. Returns: networkx.classes.graph.Graph: The generated network as a networkx Graph object. Each node will have an associated dictionary of attributes: * "id": (entity id) * "type": "value_node" or entity class in the database. * "desc": a description of the node, automatically fetched from the database (names of persons for instance) * "is_real": a flag stating if the "id" key refers to a real entity or to an occurrence. * "url": a link to the entity information in the database in the format http://localhost:8080/mhk/database/id/entityID Each edge will have associated the following key-value pairs: * "date1": date of the atribute in the left most node * "date2": date of the attribute in the right most node * "attribute": the type of the attribute * "value": the value of the attribute Examples: Generate a network of people that graduated in the same place ``G = network_from_attribute("graduated_at", mode="value-node")`` """ G = nx.Graph() if session is not None: mysession = session elif db is not None: mysession = db.session() else: raise ValueError( "No database nor session. Specifcy db=TimeLinkDatabase() or " "session=database session." ) if ignore_values is None: ignore_values = ["?"] with mysession: attribute_values_list = attribute_values( the_type=attribute, db=db, session=mysession, ) if attribute_values_list.empty: return G if ignore_values is not None: # remove the values to ignore attribute_values_list = attribute_values_list[ ~attribute_values_list.index.isin(ignore_values) ] if attribute_values_list.empty: return G for avalue in attribute_values_list.index: # we get the date of the attribute date_col = f"{attribute}.date" # we get the obs of the attribute obs_col = f"{attribute}.obs" # we get the entities with that value entities = entities_with_attribute( the_type=attribute, the_value=avalue, db=db, session=mysession, ) if entities is None or entities.empty: continue # in value node we create a node for each value if mode == "value-node": # in this mode we create a node for each value # and link it to the entities with that value§ G.add_node(avalue, id=avalue, desc=avalue, type=attribute) for idx, row in entities.iterrows(): # get info on the entity entity: Entity | None = mysession.get(Entity, idx) # type: ignore if entity is not None: # Access the date_col and obs_col values date_value = ftd(row[date_col]) if date_col in row else None obs_value = row[obs_col] if obs_col in row else None # add node for the entity G.add_node( idx, desc=entity.description, id=entity.id, type=entity.pom_class, group=entity.groupname, date=date_value, source=entity.the_source, ) if by_year and date_value: # add a year node if the date is not empty year = ctd(date_value).year G.add_node(year, type="year", desc=str(year)) # add an edge between the year and the entity G.add_edge(year, idx, date=date_value, obs=obs_value) # check if there is an edge between the value and the year if not G.has_edge(avalue, year): G.add_edge(avalue, year, date=date_value, obs=obs_value) else: G.add_edge( avalue, idx, date=date_value, attribute=attribute, value=avalue, obs=obs_value, ) elif mode == "cliques": # in this mode each entity with the same value is connected in a clique unique = entities.index.unique() for id in unique: # add the entity nodes # get info on the entity entity: Entity | None = mysession.get(Entity, id) if entity is not None: G.add_node( id, desc=entity.description, group=entity.groupname, type=entity.pom_class, source=entity.the_source, ) pairs = list(combinations(unique, 2)) if len(pairs) > 1: for id1, id2 in pairs: # get the dates and obs date1 = ( entities.at[id1, date_col] if date_col in entities.columns else "" ) date2 = ( entities.at[id2, date_col] if date_col in entities.columns else "" ) date1 = ftd(date1) if date1 else "" date2 = ftd(date2) if date2 else "" obs1: str = ( entities.at[id1, obs_col] if obs_col in entities.columns else "" ) obs2: str = ( entities.at[id2, obs_col] if obs_col in entities.columns else "" ) if ":" in date1: date1 = f'"{date1}"' if ":" in date2: date2 = f'"{date2}"' if ":" in obs1: obs1 = f'"{obs1}"' if ":" in obs2: obs2 = f'"{obs2}"' # add the edge G.add_edge( id1, id2, date1=date1, date2=date2, attribute=attribute, value=avalue, obs1=obs1, obs2=obs2, ) return G