Source code for derevo.combined

"""
Get combined graph method is defined here.
"""
from __future__ import annotations

from io import BytesIO

import networkx as nx
import pandas as pd

from derevo.adjacency import get_adjacency_graph


[docs] def get_combined_graph( plants: pd.DataFrame, cohabitation_attributes: pd.DataFrame, species_in_parks: pd.DataFrame, target_parks: list[str] | None = None, ) -> nx.Graph: """ Returns combined graph with weights equal to number of co-occurence cases and compatability outcome in attributes. """ plants = plants.copy() cohabitation = cohabitation_attributes.copy() df_comp = plants.copy() df_comp = df_comp.join(df_comp, how="cross", rsuffix="_x") df_comp = df_comp[df_comp.name_ru != df_comp.name_ru_x] df_comp["genus_con"] = ( df_comp["genus_id"].astype(int).astype(str) + ":" + df_comp["genus_id_x"].astype(int).astype(str) ) cohabitation["genus_con"] = cohabitation["genus_id_1"].astype(str) + ":" + cohabitation["genus_id_2"].astype(str) df_comp = df_comp.merge(cohabitation[["cohabitation_type", "genus_con"]], on="genus_con", how="left") df_comp = df_comp[~df_comp.filter(like="name_ru").apply(frozenset, axis=1).duplicated()].reset_index(drop=True) df_comp["cohabitation_type"].fillna("neutral", inplace=True) df_comp = df_comp[["name_ru", "name_ru_x", "cohabitation_type"]] df_comp["is_compatability"] = 1 df_adjacency: pd.DataFrame = nx.to_pandas_edgelist(get_adjacency_graph(species_in_parks, target_parks=target_parks)) df_comp = df_comp.rename(columns={"name_ru": "source", "name_ru_x": "target"}).merge( df_adjacency[["source", "target", "weight"]], on=["source", "target"], how="left", ) if target_parks is not None: df_comp = df_comp[df_comp["source"].isin(df_adjacency["source"].unique())] df_comp = df_comp.fillna(0.1) df_comp.loc[df_comp["weight"] != 0.1, "cohabitation_type"] = "has_cases" combined_graph = nx.from_pandas_edgelist( df_comp, "source", "target", ["weight", "cohabitation_type"], create_using=nx.MultiGraph(), edge_key="is_compatability", ) plant_dict = plants[["name_ru", "name_latin", "id", "is_invasive", "plant_type"]] plant_dict.index = plant_dict["name_ru"] plant_dict = plant_dict.transpose() plant_dict = plant_dict[plant_dict.index != "name_ru"].to_dict() nx.set_node_attributes(combined_graph, plant_dict) return combined_graph
[docs] def write_combined_graph_gexf( plants: pd.DataFrame, cohabitation_attributes: pd.DataFrame, species_in_parks: pd.DataFrame, target_parks: list[str] | None = None, output_path: str | BytesIO = "combined_graph.gexf", ) -> None: """ Write combined graph with weights equal to number of co-occurence cases and compatability outcome in attributes to a given file (by name or a binary file-like object) in gexf format. """ combined_graph = get_combined_graph(plants, cohabitation_attributes, species_in_parks, target_parks) nx.write_gexf(combined_graph, output_path)