From c3569948dafd7e39efae8347c1c0c2ded180c3cc Mon Sep 17 00:00:00 2001 From: Nicholas Landry Date: Mon, 2 Dec 2024 16:21:39 -0500 Subject: [PATCH] Integrate HIF with more of XGI (#613) * move hif functionality to convert * format with isort and black * add docs * add collection handling * add warning * add docstrings * added HIF to `load_xgi_data` * added more close to tests * Update xgi_data.py * Update xgi_data.py * remove other changes * added unit tests * Update xgi_data.py * Update HOW_TO_CONTRIBUTE.md * response to review * Response to review * Update docs.txt * Update release.txt --- HOW_TO_CONTRIBUTE.md | 2 +- docs/source/api/convert.rst | 1 + .../api/convert/xgi.convert.hif_dict.rst | 11 + .../api/readwrite/xgi.readwrite.hif.rst | 2 + docs/source/xgi-data.rst | 4 +- requirements/release.txt | 1 - tests/convert/test_hif_dict.py | 226 ++++++++++++++++++ tests/drawing/test_draw.py | 48 ++-- tests/readwrite/test_hif.py | 63 ++++- xgi/convert/__init__.py | 2 + xgi/convert/hif_dict.py | 159 ++++++++++++ xgi/readwrite/hif.py | 207 +++++++--------- xgi/readwrite/json.py | 3 + xgi/readwrite/xgi_data.py | 8 +- 14 files changed, 573 insertions(+), 164 deletions(-) create mode 100644 docs/source/api/convert/xgi.convert.hif_dict.rst create mode 100644 tests/convert/test_hif_dict.py create mode 100644 xgi/convert/hif_dict.py diff --git a/HOW_TO_CONTRIBUTE.md b/HOW_TO_CONTRIBUTE.md index 613db716..e031fb31 100644 --- a/HOW_TO_CONTRIBUTE.md +++ b/HOW_TO_CONTRIBUTE.md @@ -9,7 +9,7 @@ Please note we have a [code of conduct](/CODE_OF_CONDUCT.md), please follow it i 1. Download the dependencies in the developer [requirements file](/requirements/developer.txt). 2. Add unit tests for features being added or bugs being fixed. 3. Include any new method/function in the corresponding docs file. -4. Run `pytest` to verify all unit tests pass. +4. Run `pytest` to verify all unit tests pass. (To see what lines are covered, read the [`pytest-cov`](https://pytest-cov.readthedocs.io/en/latest/reporting.html) documentation.) 5. [OPTIONAL] Format codebase according to the steps below. 5. Submit Pull Request with a list of changes, links to issues that it addresses (if applicable) 6. You may merge the Pull Request in once you have the sign-off of at least one other developer, or if you do not have permission to do that, you may request the reviewer to merge it for you. diff --git a/docs/source/api/convert.rst b/docs/source/api/convert.rst index c78e1d51..d1565213 100644 --- a/docs/source/api/convert.rst +++ b/docs/source/api/convert.rst @@ -11,6 +11,7 @@ convert package ~xgi.convert.bipartite_graph ~xgi.convert.encapsulation_dag ~xgi.convert.graph + ~xgi.convert.hif_dict ~xgi.convert.higher_order_network ~xgi.convert.hyperedges ~xgi.convert.hypergraph_dict diff --git a/docs/source/api/convert/xgi.convert.hif_dict.rst b/docs/source/api/convert/xgi.convert.hif_dict.rst new file mode 100644 index 00000000..0c835941 --- /dev/null +++ b/docs/source/api/convert/xgi.convert.hif_dict.rst @@ -0,0 +1,11 @@ +xgi.convert.hif_dict +==================== + +.. currentmodule:: xgi.convert.hif_dict + +.. automodule:: xgi.convert.hif_dict + + .. rubric:: Functions + + .. autofunction:: to_hif_dict + .. autofunction:: from_hif_dict \ No newline at end of file diff --git a/docs/source/api/readwrite/xgi.readwrite.hif.rst b/docs/source/api/readwrite/xgi.readwrite.hif.rst index 0f99b9c8..2aa2e7ca 100644 --- a/docs/source/api/readwrite/xgi.readwrite.hif.rst +++ b/docs/source/api/readwrite/xgi.readwrite.hif.rst @@ -8,4 +8,6 @@ xgi.readwrite.hif .. rubric:: Functions .. autofunction:: read_hif + .. autofunction:: read_hif_collection .. autofunction:: write_hif + .. autofunction:: write_hif_collection diff --git a/docs/source/xgi-data.rst b/docs/source/xgi-data.rst index 5b964bdc..1d36ab92 100644 --- a/docs/source/xgi-data.rst +++ b/docs/source/xgi-data.rst @@ -30,7 +30,9 @@ See the `load_bigg_data() documentation `_ documentation on format (1). +All future datasets and updates to current datasets will be stored as format (1). +Format (2) is structured as follows: * :code:`hypergraph-data`: This tag accesses the attributes of the entire hypergraph dataset such as the authors or dataset name. diff --git a/requirements/release.txt b/requirements/release.txt index acb0418f..4e41020a 100644 --- a/requirements/release.txt +++ b/requirements/release.txt @@ -3,4 +3,3 @@ twine>=3.4 build>=1.2.1 wheel>=0.36 -github-changelog diff --git a/tests/convert/test_hif_dict.py b/tests/convert/test_hif_dict.py new file mode 100644 index 00000000..b2b263da --- /dev/null +++ b/tests/convert/test_hif_dict.py @@ -0,0 +1,226 @@ +import pytest + +import xgi + + +def test_to_hif_dict( + edgelist1, + hyperwithdupsandattrs, + simplicialcomplex1, + diedgedict1, + dihyperwithattrs, +): + H = xgi.Hypergraph(edgelist1) + d = xgi.to_hif_dict(H) + + assert "nodes" not in d + assert "edges" not in d + assert "incidences" in d + assert "network-type" in d + assert d["network-type"] == "undirected" + + incidences = [ + {"edge": 0, "node": 1}, + {"edge": 0, "node": 2}, + {"edge": 0, "node": 3}, + {"edge": 1, "node": 4}, + {"edge": 2, "node": 5}, + {"edge": 2, "node": 6}, + {"edge": 3, "node": 6}, + {"edge": 3, "node": 7}, + {"edge": 3, "node": 8}, + ] + assert sorted(d["incidences"], key=lambda x: (x["edge"], x["node"])) == incidences + + # hypergraph with attributes + hyperwithdupsandattrs["name"] = "test" + d = xgi.to_hif_dict(hyperwithdupsandattrs) + + assert "nodes" in d + assert "edges" in d + assert "incidences" in d + assert "network-type" in d + assert d["network-type"] == "undirected" + assert "metadata" in d + assert d["metadata"] == {"name": "test"} + + nodes = [ + {"node": 1, "attrs": {"color": "red", "name": "horse"}}, + {"node": 2, "attrs": {"color": "blue", "name": "pony"}}, + {"node": 3, "attrs": {"color": "yellow", "name": "zebra"}}, + {"node": 4, "attrs": {"color": "red", "name": "orangutan", "age": 20}}, + {"node": 5, "attrs": {"color": "blue", "name": "fish", "age": 2}}, + ] + + edges = [ + {"edge": 0, "attrs": {"color": "blue"}}, + {"edge": 1, "attrs": {"color": "red", "weight": 2}}, + {"edge": 2, "attrs": {"color": "yellow"}}, + {"edge": 3, "attrs": {"color": "purple"}}, + {"edge": 4, "attrs": {"color": "purple", "name": "test"}}, + ] + + incidences = [ + {"edge": 0, "node": 1}, + {"edge": 0, "node": 2}, + {"edge": 1, "node": 1}, + {"edge": 1, "node": 2}, + {"edge": 2, "node": 1}, + {"edge": 2, "node": 2}, + {"edge": 3, "node": 3}, + {"edge": 3, "node": 4}, + {"edge": 3, "node": 5}, + {"edge": 4, "node": 3}, + {"edge": 4, "node": 4}, + {"edge": 4, "node": 5}, + ] + + assert sorted(d["nodes"], key=lambda x: x["node"]) == nodes + assert d["edges"] == edges + assert sorted(d["incidences"], key=lambda x: (x["edge"], x["node"])) == incidences + + # Simplicial complexes + d = xgi.to_hif_dict(simplicialcomplex1) + + assert "nodes" not in d + assert "edges" not in d + assert "incidences" in d + assert "network-type" in d + assert d["network-type"] == "asc" + + incidences = [ + {"edge": "e1", "node": 0}, + {"edge": "e1", "node": "b"}, + {"edge": "e2", "node": 0}, + {"edge": "e2", "node": "c"}, + {"edge": "e3", "node": 0}, + {"edge": "e3", "node": "b"}, + {"edge": "e3", "node": "c"}, + {"edge": "e4", "node": "b"}, + {"edge": "e4", "node": "c"}, + ] + + def _mixed(ele): + return (0, int(ele)) if isinstance(ele, int) else (1, ele) + + sorted_incidences = sorted( + d["incidences"], key=lambda x: (_mixed(x["edge"]), _mixed(x["node"])) + ) + assert sorted_incidences == incidences + + # dihypergraphs without attributes + H = xgi.DiHypergraph(diedgedict1) + + d = xgi.to_hif_dict(H) + + assert "nodes" not in d + assert "edges" not in d + assert "incidences" in d + + # dihypergraphs with attributes + d = xgi.to_hif_dict(dihyperwithattrs) + + assert "nodes" in d + assert "edges" in d + assert "incidences" in d + + +def test_from_hif_dict( + hyperwithdupsandattrs, + simplicialcomplex1, + dihyperwithattrs, +): + d = xgi.to_hif_dict(hyperwithdupsandattrs) + + # test basic import + H = xgi.from_hif_dict(d) + + assert isinstance(H, xgi.Hypergraph) + assert (H.num_nodes, H.num_edges) == (5, 5) + assert set(H.nodes) == {1, 2, 3, 4, 5} + assert H.nodes[1] == {"color": "red", "name": "horse"} + assert H.nodes[2] == {"color": "blue", "name": "pony"} + assert H.nodes[3] == {"color": "yellow", "name": "zebra"} + assert H.nodes[4] == {"color": "red", "name": "orangutan", "age": 20} + assert H.nodes[5] == {"color": "blue", "name": "fish", "age": 2} + + assert set(H.edges) == {0, 1, 2, 3, 4} + assert H.edges[0] == {"color": "blue"} + assert H.edges[1] == {"color": "red", "weight": 2} + assert H.edges[2] == {"color": "yellow"} + assert H.edges[3] == {"color": "purple"} + assert H.edges[4] == {"color": "purple", "name": "test"} + + edgedict = {0: {1, 2}, 1: {1, 2}, 2: {1, 2}, 3: {3, 4, 5}, 4: {3, 4, 5}} + assert H.edges.members(dtype=dict) == edgedict + + # cast nodes and edges + H = xgi.from_hif_dict(d, nodetype=str, edgetype=float) + assert set(H.nodes) == {"1", "2", "3", "4", "5"} + assert set(H.edges) == {0.0, 1.0, 2.0, 3.0, 4.0} + + assert H.nodes["1"] == {"color": "red", "name": "horse"} + assert H.edges[0.0] == {"color": "blue"} + + edgedict = { + 0: {"1", "2"}, + 1: {"1", "2"}, + 2: {"1", "2"}, + 3: {"3", "4", "5"}, + 4: {"3", "4", "5"}, + } + assert H.edges.members(dtype=dict) == edgedict + + ds = xgi.to_hif_dict(simplicialcomplex1) + + S = xgi.from_hif_dict(ds) + assert isinstance(S, xgi.SimplicialComplex) + assert (S.num_nodes, S.num_edges) == (3, 4) + + assert set(S.nodes) == {0, "b", "c"} + assert set(S.edges) == {"e1", "e2", "e3", "e4"} + + # dihypergraphs + d = xgi.to_hif_dict(dihyperwithattrs) + + DH = xgi.from_hif_dict(d) + assert (DH.num_nodes, DH.num_edges) == (6, 3) + assert isinstance(DH, xgi.DiHypergraph) + assert set(DH.nodes) == {0, 1, 2, 3, 4, 5} + assert set(DH.edges) == {0, 1, 2} + + edgedict = {0: ({0, 1}, {2}), 1: ({1, 2}, {4}), 2: ({2, 3, 4}, {4, 5})} + assert DH.edges.dimembers(dtype=dict) == edgedict + + # test error checking + with pytest.raises(TypeError): + S = xgi.from_hif_dict(ds, edgetype=int) + + # metadata + hyperwithdupsandattrs["name"] = "test" + d = xgi.to_hif_dict(hyperwithdupsandattrs) + H = xgi.from_hif_dict(d) + + assert H["name"] == "test" + + # test isolates and empty edges + H = xgi.Hypergraph() + H.add_nodes_from(range(5)) + H.add_edges_from([[1, 2, 3], []]) + + d = xgi.to_hif_dict(H) + + H = xgi.from_hif_dict(d) + assert H.edges.size.aslist() == [3, 0] + assert set(H.nodes.isolates()) == {0, 4} + + H = xgi.DiHypergraph() + H.add_nodes_from(range(5)) + H.add_edges_from([([1, 2, 3], [2, 4]), [[], []]]) + + d = xgi.to_hif_dict(H) + + H = xgi.from_hif_dict(d) + + assert H.edges.size.aslist() == [4, 0] + assert set(H.nodes.isolates()) == {0} diff --git a/tests/drawing/test_draw.py b/tests/drawing/test_draw.py index 24c301a4..a2f695be 100644 --- a/tests/drawing/test_draw.py +++ b/tests/drawing/test_draw.py @@ -33,7 +33,7 @@ def test_draw(edgelist8): assert patch.get_zorder() == z assert node_collection.get_zorder() == 4 # nodes - plt.close() + plt.close("all") # simplicial complex S = xgi.SimplicialComplex(edgelist8) @@ -58,7 +58,7 @@ def test_draw(edgelist8): for patch, z in zip(ax.patches, [0, 2, 2]): # hyperedges assert patch.get_zorder() == z - plt.close() + plt.close("all") def test_draw_nodes(edgelist8): @@ -116,10 +116,10 @@ def test_draw_nodes(edgelist8): # negative node_lw or node_size with pytest.raises(ValueError): ax3, node_collection3 = xgi.draw_nodes(H, node_size=-1) - plt.close() + plt.close("all") with pytest.raises(ValueError): ax3, node_collection3 = xgi.draw_nodes(H, node_lw=-1) - plt.close() + plt.close("all") plt.close("all") @@ -132,14 +132,14 @@ def test_draw_nodes_fc_cmap(edgelist8): fig, ax = plt.subplots() ax, node_collection = xgi.draw_nodes(H, ax=ax, node_fc="r") assert node_collection.get_cmap() == plt.cm.viridis - plt.close() + plt.close("all") # default cmap fig, ax = plt.subplots() colors = [11, 12, 14, 16, 17, 19, 21] ax, node_collection = xgi.draw_nodes(H, ax=ax, node_fc=colors) assert node_collection.get_cmap() == plt.cm.Reds - plt.close() + plt.close("all") # set cmap fig, ax = plt.subplots() @@ -148,13 +148,13 @@ def test_draw_nodes_fc_cmap(edgelist8): ) assert node_collection.get_cmap() == plt.cm.Greens assert (min(colors), max(colors)) == node_collection.get_clim() - plt.close() + plt.close("all") # vmin/vmax fig, ax = plt.subplots() ax, node_collection = xgi.draw_nodes(H, ax=ax, node_fc=colors, vmin=14, vmax=19) assert (14, 19) == node_collection.get_clim() - plt.close() + plt.close("all") def test_draw_nodes_interp(edgelist8): @@ -168,7 +168,7 @@ def test_draw_nodes_interp(edgelist8): ax, node_collection = xgi.draw_nodes(H, ax=ax, node_size=1, node_lw=10) assert np.all(node_collection.get_sizes() == np.array([1])) assert np.all(node_collection.get_linewidth() == np.array([10])) - plt.close() + plt.close("all") # rescaling does not affect scalars fig, ax = plt.subplots() @@ -177,7 +177,7 @@ def test_draw_nodes_interp(edgelist8): ) assert np.all(node_collection.get_sizes() == np.array([1])) assert np.all(node_collection.get_linewidth() == np.array([10])) - plt.close() + plt.close("all") # not rescaling IDStat fig, ax = plt.subplots() @@ -186,7 +186,7 @@ def test_draw_nodes_interp(edgelist8): ) assert np.all(node_collection.get_sizes() == deg_arr**2) assert np.all(node_collection.get_linewidth() == deg_arr) - plt.close() + plt.close("all") # rescaling IDStat fig, ax = plt.subplots() @@ -197,7 +197,7 @@ def test_draw_nodes_interp(edgelist8): assert max(node_collection.get_sizes()) == 30**2 assert min(node_collection.get_linewidth()) == 0 assert max(node_collection.get_linewidth()) == 5 - plt.close() + plt.close("all") # rescaling IDStat with manual values fig, ax = plt.subplots() @@ -218,7 +218,7 @@ def test_draw_nodes_interp(edgelist8): assert max(node_collection.get_sizes()) == 20**2 assert min(node_collection.get_linewidth()) == 1 assert max(node_collection.get_linewidth()) == 10 - plt.close() + plt.close("all") # rescaling ndarray fig, ax = plt.subplots() @@ -229,7 +229,7 @@ def test_draw_nodes_interp(edgelist8): assert max(node_collection.get_sizes()) == 30**2 assert min(node_collection.get_linewidth()) == 0 assert max(node_collection.get_linewidth()) == 5 - plt.close() + plt.close("all") def test_draw_hyperedges(edgelist8): @@ -281,7 +281,7 @@ def test_draw_hyperedges(edgelist8): with pytest.raises(ValueError): ax, collections = xgi.draw_hyperedges(H, ax=ax, dyad_lw=-1) (dyad_collection, edge_collection) = collections - plt.close() + plt.close("all") plt.close("all") @@ -296,7 +296,7 @@ def test_draw_hyperedges_fc_cmap(edgelist8): (dyad_collection, edge_collection) = collections assert dyad_collection.get_cmap() == plt.cm.Greys assert edge_collection.get_cmap() == sb.color_palette("crest_r", as_cmap=True) - plt.close() + plt.close("all") # set cmap fig, ax = plt.subplots() @@ -313,7 +313,7 @@ def test_draw_hyperedges_fc_cmap(edgelist8): assert (min(dyad_colors), max(dyad_colors)) == dyad_collection.get_clim() assert (3, 5) == edge_collection.get_clim() - plt.close() + plt.close("all") # vmin/vmax fig, ax = plt.subplots() @@ -332,7 +332,7 @@ def test_draw_hyperedges_fc_cmap(edgelist8): assert (14, 19) == edge_collection.get_clim() assert (5, 6) == dyad_collection.get_clim() - plt.close() + plt.close("all") def test_draw_hyperedges_ec(edgelist8): @@ -364,7 +364,7 @@ def test_draw_simplices(edgelist8): with pytest.raises(XGIError): H = xgi.Hypergraph(edgelist8) ax = xgi.draw_simplices(H) - plt.close() + plt.close("all") S = xgi.SimplicialComplex(edgelist8) @@ -386,7 +386,7 @@ def test_draw_simplices(edgelist8): for patch, z in zip(ax.patches, [0, 2, 2]): # hyperedges assert patch.get_zorder() == z - plt.close() + plt.close("all") def test_draw_hypergraph_hull(edgelist8): @@ -414,7 +414,7 @@ def test_draw_hypergraph_hull(edgelist8): assert patch.get_zorder() == z assert node_collection.get_zorder() == 4 # nodes - plt.close() + plt.close("all") def test_draw_multilayer(edgelist8): @@ -461,7 +461,7 @@ def test_draw_multilayer(edgelist8): # node_size assert np.all(node_coll.get_sizes() == np.array([5**2])) - plt.close() + plt.close("all") # max_order parameter max_order = 2 @@ -484,7 +484,7 @@ def test_draw_multilayer(edgelist8): offsets = node_coll2.get_offsets() assert offsets.shape[0] == H.num_nodes # nodes - plt.close() + plt.close("all") # conn_lines parameter ax3, (node_coll3, edge_coll3) = xgi.draw_multilayer(H, conn_lines=False) @@ -503,7 +503,7 @@ def test_draw_multilayer(edgelist8): + num_dyad_collections == len(ax3.collections) ) - plt.close() + plt.close("all") # custom parameters pos = xgi.circular_layout(H) diff --git a/tests/readwrite/test_hif.py b/tests/readwrite/test_hif.py index 62ae6173..b12b280b 100644 --- a/tests/readwrite/test_hif.py +++ b/tests/readwrite/test_hif.py @@ -1,12 +1,13 @@ import json import tempfile +from os.path import join import pytest import xgi -def test_to_hif( +def test_write_hif( edgelist1, hyperwithdupsandattrs, simplicialcomplex1, @@ -153,7 +154,7 @@ def _mixed(ele): assert "incidences" in jsondata -def test_from_hif( +def test_read_hif( hyperwithdupsandattrs, simplicialcomplex1, dihyperwithattrs, @@ -247,13 +248,57 @@ def test_from_hif( assert H.edges.size.aslist() == [3, 0] assert set(H.nodes.isolates()) == {0, 4} - H = xgi.DiHypergraph() - H.add_nodes_from(range(5)) - H.add_edges_from([([1, 2, 3], [2, 4]), [[], []]]) + DH = xgi.DiHypergraph() + DH.add_nodes_from(range(5)) + DH.add_edges_from([([1, 2, 3], [2, 4]), [[], []]]) _, filename6 = tempfile.mkstemp() - xgi.write_hif(H, filename6) + xgi.write_hif(DH, filename6) + + DH = xgi.read_hif(filename6) + + assert DH.edges.size.aslist() == [4, 0] + assert set(DH.nodes.isolates()) == {0} + + +def test_read_hif_collection(): + # test isolates and empty edges + H = xgi.Hypergraph() + H.add_nodes_from(range(5)) + H.add_edges_from([[1, 2, 3], []]) + + _, filename5 = tempfile.mkstemp() + xgi.write_hif(H, filename5) + + H = xgi.read_hif(filename5) + assert H.edges.size.aslist() == [3, 0] + assert set(H.nodes.isolates()) == {0, 4} + + DH = xgi.DiHypergraph() + DH.add_nodes_from(range(5)) + DH.add_edges_from([([1, 2, 3], [2, 4]), [[], []]]) + # test collections - H = xgi.read_hif(filename6) - # assert H.edges.size.aslist() == [5, 0] - # assert set(H.nodes.isolates()) == {0} + # test list collection + collection = [H, DH] + tempdir = tempfile.mkdtemp() + + xgi.write_hif_collection(collection, tempdir, collection_name="test") + collection = xgi.read_hif_collection( + join(tempdir, "test_collection_information.json") + ) + assert len(collection) == 2 + assert isinstance(collection, dict) + assert sorted(collection) == ["0", "1"] + + # test dict collection + collection = {"dataset1": H, "dataset2": DH} + tempdir = tempfile.mkdtemp() + + xgi.write_hif_collection(collection, tempdir, collection_name="test") + collection = xgi.read_hif_collection( + join(tempdir, "test_collection_information.json") + ) + assert len(collection) == 2 + assert isinstance(collection, dict) + assert sorted(collection) == ["dataset1", "dataset2"] diff --git a/xgi/convert/__init__.py b/xgi/convert/__init__.py index 84340a08..8a5f38bf 100644 --- a/xgi/convert/__init__.py +++ b/xgi/convert/__init__.py @@ -3,6 +3,7 @@ bipartite_graph, encapsulation_dag, graph, + hif_dict, higher_order_network, hyperedges, hypergraph_dict, @@ -15,6 +16,7 @@ from .bipartite_graph import * from .encapsulation_dag import * from .graph import * +from .hif_dict import * from .higher_order_network import * from .hyperedges import * from .hypergraph_dict import * diff --git a/xgi/convert/hif_dict.py b/xgi/convert/hif_dict.py new file mode 100644 index 00000000..0d288fb3 --- /dev/null +++ b/xgi/convert/hif_dict.py @@ -0,0 +1,159 @@ +"""Methods for converting to/from HIF standard.""" + +from collections import defaultdict + +from ..core import DiHypergraph, Hypergraph, SimplicialComplex +from ..utils import IDDict +from .bipartite_edges import to_bipartite_edgelist + +__all__ = ["to_hif_dict", "from_hif_dict"] + + +def to_hif_dict(H): + """ + A function to create a dictionary according to the HIF standard from a higher-order network. + + For more information, see the HIF `project `_. + + Parameters + ---------- + H: Hypergraph, DiHypergraph, or SimplicialComplex object + The specified higher-order network + + Returns + ------- + defaultdict + A dict according to the HIF standard. + """ + data = defaultdict(list) + + data["metadata"] = {} + data["metadata"].update(H._net_attr) + + if isinstance(H, SimplicialComplex): + data["network-type"] = "asc" + elif isinstance(H, Hypergraph): + data["network-type"] = "undirected" + elif isinstance(H, DiHypergraph): + data["network-type"] = "directed" + + # get node data + isolates = set(H.nodes.isolates()) + nodes_with_attrs = set(n for n in H.nodes if H.nodes[n]) + for n in isolates.union(nodes_with_attrs): + attr = {"attrs": H.nodes[n]} if H.nodes[n] else {} + data["nodes"].append(IDDict({"node": n}) + attr) + + empty = set(H.edges.empty()) + edges_with_attrs = set(e for e in H.edges if H.edges[e]) + for e in empty.union(edges_with_attrs): + attr = {"attrs": H.edges[e]} if H.edges[e] else {} + data["edges"].append(IDDict({"edge": e}) + attr) + + # hyperedge dict + if data["network-type"] == "directed": + _convert_d = lambda d: "tail" if d == "in" else "head" + data["incidences"] = [ + IDDict({"edge": e, "node": n, "direction": _convert_d(d)}) + for n, e, d in to_bipartite_edgelist(H) + ] + elif data["network-type"] in {"undirected", "asc"}: + data["incidences"] = [ + IDDict({"edge": e, "node": n}) for n, e in to_bipartite_edgelist(H) + ] + return data + + +def from_hif_dict(data, nodetype=None, edgetype=None): + """ + A function to read a dictionary that follows the HIF standard. + + For more information, see the HIF `project `_. + + Parameters + ---------- + data: dict + A dictionary in the hypergraph JSON format + nodetype: type, optional + Type that the node IDs will be cast to + edgetype: type, optional + Type that the edge IDs will be cast to + + Returns + ------- + A Hypergraph, SimplicialComplex, or DiHypergraph object + The loaded network + """ + + def _empty_edge(network_type): + if network_type in {"asc", "undirected"}: + return set() + else: + return (set(), set()) + + def _convert_id(i, idtype): + if idtype: + try: + return idtype(i) + except ValueError as e: + raise TypeError(f"Failed to convert ID {i} to type {idtype}.") from e + else: + return i + + _convert_d = lambda d: "in" if d == "tail" else "out" + + if "network-type" in data: + network_type = data["network-type"] + else: + network_type = "undirected" + + if network_type in {"asc", "undirected"}: + G = Hypergraph() + elif network_type == "directed": + G = DiHypergraph() + + # Import network metadata + if "metadata" in data: + G._net_attr.update(data["metadata"]) + + for record in data["incidences"]: + n = _convert_id(record["node"], nodetype) + e = _convert_id(record["edge"], edgetype) + + if network_type == "directed": + d = record["direction"] + d = _convert_d(d) # convert from head/tail to in/out + G.add_node_to_edge(e, n, d) + else: + G.add_node_to_edge(e, n) + + # import node attributes if they exist + if "nodes" in data: + for record in data["nodes"]: + n = _convert_id(record["node"], nodetype) + if "attrs" in record: + attr = record["attrs"] + else: + attr = {} + + if n not in G._node: + G.add_node(n, **attr) + else: + G.set_node_attributes({n: attr}) + + # import edge attributes if they exist + if "edges" in data: + for record in data["edges"]: + e = _convert_id(record["edge"], edgetype) + if "attrs" in record: + attr = record["attrs"] + else: + attr = {} + if e not in G._edge: + G.add_edge(_empty_edge(network_type), e, **attr) + else: + G.set_edge_attributes({e: attr}) + + if network_type == "asc": + G = SimplicialComplex(G) + return G diff --git a/xgi/readwrite/hif.py b/xgi/readwrite/hif.py index 9d00ee1f..1b8dec9c 100644 --- a/xgi/readwrite/hif.py +++ b/xgi/readwrite/hif.py @@ -6,15 +6,15 @@ import json from collections import defaultdict +from os.path import dirname, join -from ..convert import to_bipartite_edgelist -from ..core import DiHypergraph, Hypergraph, SimplicialComplex -from ..utils import IDDict +from ..convert import from_hif_dict, to_hif_dict +from ..exception import XGIError -__all__ = ["write_hif", "read_hif"] +__all__ = ["write_hif", "write_hif_collection", "read_hif", "read_hif_collection"] -def write_hif(G, path): +def write_hif(H, path): """ A function to write a higher-order network according to the HIF standard. @@ -22,48 +22,12 @@ def write_hif(G, path): Parameters ---------- - G: Hypergraph, DiHypergraph, or SimplicialComplex object + H: Hypergraph, DiHypergraph, or SimplicialComplex object The specified higher-order network path: string The path of the file to read from """ - # initialize empty data - data = defaultdict(list) - - data["metadata"] = {} - data["metadata"].update(G._net_attr) - - if isinstance(G, SimplicialComplex): - data["network-type"] = "asc" - elif isinstance(G, Hypergraph): - data["network-type"] = "undirected" - elif isinstance(G, DiHypergraph): - data["network-type"] = "directed" - - # get node data - isolates = set(G.nodes.isolates()) - nodes_with_attrs = set(n for n in G.nodes if G.nodes[n]) - for n in isolates.union(nodes_with_attrs): - attr = {"attrs": G.nodes[n]} if G.nodes[n] else {} - data["nodes"].append(IDDict({"node": n}) + attr) - - empty = set(G.edges.empty()) - edges_with_attrs = set(e for e in G.edges if G.edges[e]) - for e in empty.union(edges_with_attrs): - attr = {"attrs": G.edges[e]} if G.edges[e] else {} - data["edges"].append(IDDict({"edge": e}) + attr) - - # hyperedge dict - if data["network-type"] == "directed": - _convert_d = lambda d: "tail" if d == "in" else "head" - data["incidences"] = [ - IDDict({"edge": e, "node": n, "direction": _convert_d(d)}) - for n, e, d in to_bipartite_edgelist(G) - ] - elif data["network-type"] in {"undirected", "asc"}: - data["incidences"] = [ - IDDict({"edge": e, "node": n}) for n, e in to_bipartite_edgelist(G) - ] + data = to_hif_dict(H) datastring = json.dumps(data, indent=2) @@ -71,6 +35,50 @@ def write_hif(G, path): output_file.write(datastring) +def write_hif_collection(H, path, collection_name=""): + """ + A function to write a collection of higher-order network according to the HIF standard. + + For more information, see the HIF `project `_. + + Parameters + ---------- + H: list or dict of Hypergraph, DiHypergraph, or SimplicialComplex objects + The specified higher-order network + path: string + The path of the file to read from + """ + if isinstance(H, list): + collection_data = defaultdict(dict) + for i, H in enumerate(H): + fname = f"{path}/{collection_name}_{i}.json" + collection_data["datasets"][i] = { + "relative-path": f"{collection_name}_{i}.json" + } + write_hif(H, fname) + collection_data["type"] = "collection" + datastring = json.dumps(collection_data, indent=2) + with open( + f"{path}/{collection_name}_collection_information.json", "w" + ) as output_file: + output_file.write(datastring) + + elif isinstance(H, dict): + collection_data = defaultdict(dict) + for name, H in H.items(): + fname = f"{path}/{collection_name}_{name}.json" + collection_data["datasets"][name] = { + "relative-path": f"{collection_name}_{name}.json" + } + write_hif(H, fname) + collection_data["type"] = "collection" + datastring = json.dumps(collection_data, indent=2) + with open( + f"{path}/{collection_name}_collection_information.json", "w" + ) as output_file: + output_file.write(datastring) + + def read_hif(path, nodetype=None, edgetype=None): """ A function to read a file created according to the HIF format. @@ -79,8 +87,8 @@ def read_hif(path, nodetype=None, edgetype=None): Parameters ---------- - data: dict - A dictionary in the hypergraph JSON format + path: str + The path to the json file nodetype: type, optional type that the node IDs will be cast to edgetype: type, optional @@ -94,99 +102,44 @@ def read_hif(path, nodetype=None, edgetype=None): with open(path) as file: data = json.loads(file.read()) - return _from_dict(data, nodetype=nodetype, edgetype=edgetype) + return from_hif_dict(data, nodetype=nodetype, edgetype=edgetype) -def _from_dict(data, nodetype=None, edgetype=None): +def read_hif_collection(path, nodetype=None, edgetype=None): """ - A helper function to read a file created according to the HIF format. + A function to read a collection of files created according to the HIF format. + + There must be a collection information JSON file which has a top-level field "datasets" + with subfields "relative-path", indicating each dataset's location relative to the + collection file For more information, see the HIF `project `_. Parameters ---------- - data: dict - A dictionary in the hypergraph JSON format + path: str + A path to the collection json file. nodetype: type, optional - Type that the node IDs will be cast to + type that the node IDs will be cast to edgetype: type, optional - Type that the edge IDs will be cast to + type that the edge IDs will be cast to Returns ------- - A Hypergraph, SimplicialComplex, or DiHypergraph object - The loaded network + A dictionary of Hypergraph, SimplicialComplex, or DiHypergraph objects + The collection of networks """ - - def _empty_edge(network_type): - if network_type in {"asc", "undirected"}: - return set() - else: - return (set(), set()) - - def _convert_id(i, idtype): - if idtype: - try: - return idtype(i) - except ValueError as e: - raise TypeError(f"Failed to convert ID {i} to type {idtype}.") from e - else: - return i - - _convert_d = lambda d: "in" if d == "tail" else "out" - - if "network-type" in data: - network_type = data["network-type"] - else: - network_type = "undirected" - - if network_type in {"asc", "undirected"}: - G = Hypergraph() - elif network_type == "directed": - G = DiHypergraph() - - # Import network metadata - if "metadata" in data: - G._net_attr.update(data["metadata"]) - - for record in data["incidences"]: - n = _convert_id(record["node"], nodetype) - e = _convert_id(record["edge"], edgetype) - - if network_type == "directed": - d = record["direction"] - d = _convert_d(d) # convert from head/tail to in/out - G.add_node_to_edge(e, n, d) - else: - G.add_node_to_edge(e, n) - - # import node attributes if they exist - if "nodes" in data: - for record in data["nodes"]: - n = _convert_id(record["node"], nodetype) - if "attrs" in record: - attr = record["attrs"] - else: - attr = {} - - if n not in G._node: - G.add_node(n, **attr) - else: - G.set_node_attributes({n: attr}) - - # import edge attributes if they exist - if "edges" in data: - for record in data["edges"]: - e = _convert_id(record["edge"], edgetype) - if "attrs" in record: - attr = record["attrs"] - else: - attr = {} - if e not in G._edge: - G.add_edge(_empty_edge(network_type), e, **attr) - else: - G.set_edge_attributes({e: attr}) - - if network_type == "asc": - G = SimplicialComplex(G) - return G + with open(path) as file: + jsondata = json.loads(file.read()) + + try: + collection = {} + for name, data in jsondata["datasets"].items(): + relpath = data["relative-path"] + H = read_hif( + join(dirname(path), relpath), nodetype=nodetype, edgetype=edgetype + ) + collection[name] = H + return collection + except KeyError: + raise XGIError("Data collection is in the wrong format!") diff --git a/xgi/readwrite/json.py b/xgi/readwrite/json.py index 8de683c6..3493aac9 100644 --- a/xgi/readwrite/json.py +++ b/xgi/readwrite/json.py @@ -3,6 +3,7 @@ import json from collections import defaultdict from os.path import dirname, join +from warnings import warn from ..convert import from_hypergraph_dict, to_hypergraph_dict from ..core import Hypergraph, SimplicialComplex @@ -36,6 +37,7 @@ def write_json(H, path, collection_name=""): to strings, e.g., node IDs "2" and 2. """ + warn("This function is deprecated in favor of the 'write_hif()' function") if collection_name: collection_name += "_" @@ -101,6 +103,7 @@ def read_json(path, nodetype=None, edgetype=None): If the JSON is not in a format that can be loaded. """ + warn("This function is deprecated in favor of the 'read_hif()' function") with open(path) as file: jsondata = json.loads(file.read()) diff --git a/xgi/readwrite/xgi_data.py b/xgi/readwrite/xgi_data.py index 4fe2f9d2..e2922525 100644 --- a/xgi/readwrite/xgi_data.py +++ b/xgi/readwrite/xgi_data.py @@ -3,7 +3,7 @@ from os.path import dirname, exists, join from warnings import warn -from ..convert import from_hypergraph_dict +from ..convert import cut_to_order, from_hif_dict, from_hypergraph_dict from ..exception import XGIError from ..utils import request_json_from_url, request_json_from_url_cached @@ -164,6 +164,12 @@ def _request_from_xgi_data( else: jsondata = request_json_from_url(url) + if "incidences" in jsondata: + H = from_hif_dict(H, nodetype=nodetype, edgetype=edgetype) + if max_order: + H = cut_to_order(H, order=max_order) + return H + if "type" in jsondata and jsondata["type"] == "collection": collection = {} for name, data in jsondata["datasets"].items():