Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new readers and schemas for reduced data storage in db #437

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 1 addition & 20 deletions aeon/io/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
from dotmap import DotMap

from aeon import util
from aeon.io.api import aeon as aeon_time
from aeon.io.api import chunk, chunk_key
from aeon.io.api import chunk_key
jkbhagatio marked this conversation as resolved.
Show resolved Hide resolved

_SECONDS_PER_TICK = 32e-6
_payloadtypes = {
Expand Down Expand Up @@ -187,24 +186,6 @@ class Encoder(Harp):
def __init__(self, pattern):
super().__init__(pattern, columns=["angle", "intensity"])

def read(self, file, downsample=True):
"""Reads encoder data from the specified Harp binary file.

By default the encoder data is downsampled to 50Hz. Setting downsample to
False or None can be used to force the raw data to be returned.
"""
data = super().read(file)
if downsample is True:
# resample requires a DatetimeIndex so we convert early
data.index = aeon_time(data.index)

first_index = data.first_valid_index()
if first_index is not None:
# since data is absolute angular position we decimate by taking first of each bin
chunk_origin = chunk(first_index)
data = data.resample("20ms", origin=chunk_origin).first()
return data


class Position(Harp):
"""Extract 2D position tracking data for a specific camera.
Expand Down
204 changes: 204 additions & 0 deletions aeon/schema/ingestion_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
"""Aeon experiment schemas for DataJoint database ingestion."""

from os import PathLike

import pandas as pd
from dotmap import DotMap

import aeon.schema.core as stream
from aeon.io import reader
from aeon.io.api import aeon as aeon_time
from aeon.schema import foraging, octagon, social_01, social_02, social_03
from aeon.schema.foraging import DepletionFunction, Feeder
from aeon.schema.streams import Device, Stream, StreamGroup


# Define new readers
class _Encoder(reader.Encoder):
"""A version of the encoder reader that can downsample the data."""

def read(self, file: PathLike[str], sr_hz: int = 50) -> pd.DataFrame:
"""Reads encoder data from the specified Harp binary file."""
data = super().read(file)
data.index = aeon_time(data.index)
first_index = data.first_valid_index()
if first_index is not None:
data = data.resample(f"{1/sr_hz}S").first() # take first sample in each resampled bin
return data
jkbhagatio marked this conversation as resolved.
Show resolved Hide resolved


class _Video(reader.Video):
"""A version of the video reader that drops columns that can be recreated from data and metadata."""

def read(self, file: PathLike[str], drop_cols=None) -> pd.DataFrame:
"""Reads video metadata from the specified file."""
drop_cols = ["hw_counter", "_frame", "_path", "_epoch"] if drop_cols is None else drop_cols
data = pd.read_csv(file, header=0, names=self._rawcolumns)
data.drop(columns=drop_cols, errors="ignore", inplace=True)
data.set_index("time", inplace=True)
return data
Comment on lines +33 to +39

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Image description CodeRabbit

In the read method of the _Video class, you are dropping columns from the DataFrame without checking if they exist. This could lead to errors if one or more of the specified columns are not present in the DataFrame. Consider adding a check to ensure that only existing columns are dropped.

-        data.drop(columns=drop_cols, errors="ignore", inplace=True)
+        drop_cols = [col for col in drop_cols if col in data.columns]
+        data.drop(columns=drop_cols, inplace=True)



# Define new streams and stream groups
class Video(Stream):
"""Video frame metadata."""

def __init__(self, pattern):
super().__init__(_Video(f"{pattern}_*"))


class Encoder(Stream):
"""Wheel magnetic encoder data."""

def __init__(self, pattern):
super().__init__(_Encoder(f"{pattern}_90_*"))
Comment on lines +46 to +54

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Image description CodeRabbit

The __init__ methods in the Video and Encoder classes are hardcoding the pattern for file matching. This reduces the flexibility of these classes and makes them less reusable. Consider passing the pattern as an argument to the __init__ method instead.

-    def __init__(self, pattern):
-        super().__init__(_Video(f"{pattern}_*"))
+    def __init__(self, pattern, file_pattern):
+        super().__init__(_Video(f"{pattern}_{file_pattern}"))

-    def __init__(self, pattern):
-        super().__init__(_Encoder(f"{pattern}_90_*"))
+    def __init__(self, pattern, file_pattern):
+        super().__init__(_Encoder(f"{pattern}_{file_pattern}"))



class Patch(StreamGroup):
"""Data streams for a patch."""

def __init__(self, pattern):
super().__init__(pattern, DepletionFunction, Encoder, Feeder)
jkbhagatio marked this conversation as resolved.
Show resolved Hide resolved


# Define schemas
octagon01 = DotMap(
[
Device("Metadata", stream.Metadata),
Device("CameraTop", Video, stream.Position),
Device("CameraColorTop", Video),
Device("ExperimentalMetadata", stream.SubjectState),
Device("Photodiode", octagon.Photodiode),
Device("OSC", octagon.OSC),
Device("TaskLogic", octagon.TaskLogic),
Device("Wall1", octagon.Wall),
Device("Wall2", octagon.Wall),
Device("Wall3", octagon.Wall),
Device("Wall4", octagon.Wall),
Device("Wall5", octagon.Wall),
Device("Wall6", octagon.Wall),
Device("Wall7", octagon.Wall),
Device("Wall8", octagon.Wall),
]
)

exp01 = DotMap(
[
Device("SessionData", foraging.SessionData),
Device("FrameTop", Video, stream.Position),
Device("FrameEast", Video),
Device("FrameGate", Video),
Device("FrameNorth", Video),
Device("FramePatch1", Video),
Device("FramePatch2", Video),
Device("FrameSouth", Video),
Device("FrameWest", Video),
Device("Patch1", foraging.DepletionFunction, stream.Encoder, foraging.Feeder),
Device("Patch2", foraging.DepletionFunction, stream.Encoder, foraging.Feeder),
]
)

exp02 = DotMap(
[
Device("Metadata", stream.Metadata),
Device("ExperimentalMetadata", stream.Environment, stream.MessageLog),
Device("CameraTop", Video, stream.Position, foraging.Region),
Device("CameraEast", Video),
Device("CameraNest", Video),
Device("CameraNorth", Video),
Device("CameraPatch1", Video),
Device("CameraPatch2", Video),
Device("CameraSouth", Video),
Device("CameraWest", Video),
Device("Nest", foraging.Weight),
Device("Patch1", Patch),
Device("Patch2", Patch),
]
)

social01 = DotMap(
[
Device("Metadata", stream.Metadata),
Device("Environment", social_02.Environment, social_02.SubjectData),
Device("CameraTop", Video, social_01.Pose),
Device("CameraNorth", Video),
Device("CameraSouth", Video),
Device("CameraEast", Video),
Device("CameraWest", Video),
Device("CameraPatch1", Video),
Device("CameraPatch2", Video),
Device("CameraPatch3", Video),
Device("CameraNest", Video),
Device("Nest", social_02.WeightRaw, social_02.WeightFiltered),
Device("Patch1", Patch),
Device("Patch2", Patch),
Device("Patch3", Patch),
Device("RfidGate", social_01.RfidEvents),
Device("RfidNest1", social_01.RfidEvents),
Device("RfidNest2", social_01.RfidEvents),
Device("RfidPatch1", social_01.RfidEvents),
Device("RfidPatch2", social_01.RfidEvents),
Device("RfidPatch3", social_01.RfidEvents),
]
)


social02 = DotMap(
[
Device("Metadata", stream.Metadata),
Device("Environment", social_02.Environment, social_02.SubjectData),
Device("CameraTop", Video, social_02.Pose),
Device("CameraNorth", Video),
Device("CameraSouth", Video),
Device("CameraEast", Video),
Device("CameraWest", Video),
Device("CameraPatch1", Video),
Device("CameraPatch2", Video),
Device("CameraPatch3", Video),
Device("CameraNest", Video),
Device("Nest", social_02.WeightRaw, social_02.WeightFiltered),
Device("Patch1", Patch),
Device("Patch2", Patch),
Device("Patch3", Patch),
Device("GateRfid", social_02.RfidEvents),
Device("NestRfid1", social_02.RfidEvents),
Device("NestRfid2", social_02.RfidEvents),
Device("Patch1Rfid", social_02.RfidEvents),
Device("Patch2Rfid", social_02.RfidEvents),
Device("Patch3Rfid", social_02.RfidEvents),
]
)


social03 = DotMap(
[
Device("Metadata", stream.Metadata),
Device("Environment", social_02.Environment, social_02.SubjectData),
Device("CameraTop", Video, social_03.Pose),
Device("CameraNorth", Video),
Device("CameraSouth", Video),
Device("CameraEast", Video),
Device("CameraWest", Video),
Device("CameraNest", Video),
Device("CameraPatch1", Video),
Device("CameraPatch2", Video),
Device("CameraPatch3", Video),
Device("Nest", social_02.WeightRaw, social_02.WeightFiltered),
Device("Patch1", Patch),
Device("Patch2", Patch),
Device("Patch3", Patch),
Device("PatchDummy1", Patch),
Device("NestRfid1", social_02.RfidEvents),
Device("NestRfid2", social_02.RfidEvents),
Device("GateRfid", social_02.RfidEvents),
Device("GateEastRfid", social_02.RfidEvents),
Device("GateWestRfid", social_02.RfidEvents),
Device("Patch1Rfid", social_02.RfidEvents),
Device("Patch2Rfid", social_02.RfidEvents),
Device("Patch3Rfid", social_02.RfidEvents),
Device("PatchDummy1Rfid", social_02.RfidEvents),
]
)


__all__ = ["octagon01", "exp01", "exp02", "social01", "social02", "social03"]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Image description CodeRabbit

The __all__ variable is used to specify the public interface of a module. However, it's not clear from this code whether all the items listed in __all__ are intended to be part of the public interface. Please ensure that only those items that should be accessible when importing the module using from module import * are included in __all__.

-__all__ = ["octagon01", "exp01", "exp02", "social01", "social02", "social03"]
+__all__ = ["intended_public_interface_item1", "intended_public_interface_item2", ...]

Loading