Skip to content

Commit

Permalink
Fixing dtype issues in network.get (#217)
Browse files Browse the repository at this point in the history
* Fixing dtype issues in network.get, yields tuples of (pop_name, df)
* Update Changelog
* Remove CircuitIds.index_schema, NetworkObject.property_dtypes
* Add entries in notebooks regarding {nodes,edges}.get
* Bump up the major version to 2.0.0
  • Loading branch information
joni-herttuainen authored Oct 17, 2023
1 parent e8a6255 commit 0cc9bf4
Show file tree
Hide file tree
Showing 9 changed files with 532 additions and 280 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Changelog
=========

Version v1.1.0
Version v2.0.0
--------------

New Features
Expand All @@ -21,8 +21,16 @@ Improvements
- Added kwarg: ``raise_missing_property`` to ``NodePopulation.get``
- Undeprecated calling ``Edges.get`` and ``EdgePopulation.get`` with ``properties=None``

Bug Fixes
~~~~~~~~~
- Fixed the `Same property with different dtype` issue with ``nodes.get``, ``edges.get``

Breaking Changes
~~~~~~~~~~~~~~~~
- ``nodes.get`` and ``edges.get`` (and ``network.get``) no longer return a dataframe
- returns a generator yielding tuples of ``(<population_name>, <dataframe>)`` instead
- to get the previous behavior (all in one dataframe): ``pd.concat(df for _, df in circuit.nodes.get(*args, **kwargs))``
- Removed ``Network.property_dtypes``, ``CircuitIds.index_schema``
- ``Circuit.node_sets``, ``Simulation.node_sets`` returns ``NodeSets`` object initialized with empty dict when node sets file is not present
- ``NodeSet.resolved`` is no longer available
- ``FrameReport.node_set`` returns node_set name instead of resolved node set query
Expand Down
8 changes: 0 additions & 8 deletions bluepysnap/circuit_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,6 @@ def __init__(self, index, sort_index=True):
index = index.sortlevel()[0]
self.index = index

@property
def index_schema(self):
"""Return an empty index with the same names and dtypes of the wrapped index."""
# NOTE: Since pandas 2.1.0, the index needs to contain the explicit dtypes. In pd.concat,
# the dtypes of multi-index are coerced to 'object' if any dataframe has indices with
# dtype='object'
return self.index[:0]

@classmethod
def _instance(cls, index, sort_index=True):
"""The instance returned by the functions."""
Expand Down
33 changes: 4 additions & 29 deletions bluepysnap/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,22 +48,6 @@ def _populations(self):
def population_names(self):
"""Should define all sorted NetworkObjects population names from the Circuit."""

@cached_property
def property_dtypes(self):
"""Returns all the NetworkObjects property dtypes for the Circuit."""

def _update(d, index, value):
if d.setdefault(index, value) != value:
raise BluepySnapError(
f"Same property with different dtype. {index}: {value}!= {d[index]}"
)

res = {}
for pop in self.values():
for varname, dtype in pop.property_dtypes.items():
_update(res, varname, dtype)
return pd.Series(res)

def keys(self):
"""Returns iterator on the NetworkObjectPopulation names.
Expand Down Expand Up @@ -149,7 +133,7 @@ def ids(self, group=None, sample=None, limit=None):

@abc.abstractmethod
def get(self, group=None, properties=None):
"""Returns the properties of the NetworkObject."""
"""Yields the properties of the NetworkObject."""
ids = self.ids(group)
properties = utils.ensure_list(properties)
# We don t convert to set properties itself to keep the column order.
Expand All @@ -159,14 +143,6 @@ def get(self, group=None, properties=None):
if unknown_props:
raise BluepySnapError(f"Unknown properties required: {unknown_props}")

# Retrieve the dtypes of the selected properties.
# However, the int dtype may not be preserved if some values are NaN.
dtypes = {
column: dtype
for column, dtype in self.property_dtypes.items()
if column in properties_set
}
dataframes = [pd.DataFrame(columns=properties, index=ids.index_schema).astype(dtypes)]
for name, pop in sorted(self.items()):
# since ids is sorted, global_pop_ids should be sorted as well
global_pop_ids = ids.filter_population(name)
Expand All @@ -177,10 +153,9 @@ def get(self, group=None, properties=None):
# However, it's a bit more performant than converting the Series to numpy arrays.
pop_df = pd.DataFrame({prop: pop.get(pop_ids, prop) for prop in pop_properties})
pop_df.index = global_pop_ids.index
dataframes.append(pop_df)
res = pd.concat(dataframes)
assert res.index.is_monotonic_increasing, "The index should be already sorted"
return res

# Sort the columns in the given order
yield name, pop_df[[p for p in properties if p in pop_properties]]

@abc.abstractmethod
def __getstate__(self):
Expand Down
Loading

0 comments on commit 0cc9bf4

Please sign in to comment.