Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neurophotometrics dev #42

Merged
merged 2 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 75 additions & 9 deletions src/iblphotometry/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
def from_raw_neurophotometrics_file_to_raw_df(
path: str | Path, validate=True, version='new'
) -> pd.DataFrame:
"""reads in a file as generated by the neurophotometrics FP3002 (both new and old versions) with validation

Args:
path (str | Path): path to the file, can be in either .csv or .pqt format
validate (bool, optional): If True, validates the file. Defaults to True.
version (str, optional): 'new' or 'old' version of the neurophotometrics file format. Defaults to 'new'.

Returns:
pd.DataFrame: the data as a raw dataframe format
"""
path = Path(path) if isinstance(path, str) else path
match path.suffix:
case '.csv':
Expand All @@ -30,6 +40,17 @@ def from_raw_neurophotometrics_file_to_raw_df(
def from_raw_neurophotometrics_df_to_ibl_df(
raw_df: pd.DataFrame, rois=None, drop_first=True
) -> pd.DataFrame:
"""reads in a dataframe with the raw photometry data as generated by the neurophotometrics FP3002 into the ibl photometry dataformat.


Args:
raw_df (pd.DataFrame): as returned by `from_raw_neurophotometrics_file_to_raw_df`
rois (_type_, optional): names of the rois as selected by the user in the acquisition UI. If None, the names are inferred from the data. Defaults to None.
drop_first (bool, optional): Drop the The first frame, which has all LEDs on by default. Defaults to True.

Returns:
pd.DataFrame: the data in the ibl photometry data format
"""
if rois is None:
rois = infer_data_columns(raw_df)

Expand Down Expand Up @@ -79,6 +100,17 @@ def from_raw_neurophotometrics_df_to_ibl_df(
def from_raw_neurophotometrics_file_to_ibl_df(
path: str | Path, drop_first=True, validate=True, version='new'
) -> pd.DataFrame:
"""convenience function that chains `from_raw_neurophotometrics_file_to_raw_df` and `from_raw_neurophotometrics_df_to_ibl_df`. See docstrings

Args:
path (str | Path): _description_
drop_first (bool, optional): Drop the The first frame, which has all LEDs on by default. Defaults to True.
validate (bool, optional): If True, validates the file. Defaults to True.
version (str, optional): 'new' or 'old' version of the neurophotometrics file format. Defaults to 'new'.

Returns:
pd.DataFrame: _description_
"""
raw_df = from_raw_neurophotometrics_file_to_raw_df(
path, validate=validate, version=version
)
Expand All @@ -88,10 +120,10 @@ def from_raw_neurophotometrics_file_to_ibl_df(


def from_ibl_pqt_to_ibl_df(path: str | Path, validate=False):
ibl_df = pd.read_parquet(path)
if validate is True:
# TODO
raise NotImplementedError
return pd.read_parquet(path)
ibl_df = validate_ibl_dataframe(ibl_df)
return ibl_df


def from_ibl_dataframe(
Expand All @@ -101,13 +133,14 @@ def from_ibl_dataframe(
channel_column: str = 'name',
channel_names: list[str] | None = None,
rename: dict | None = None,
validate: bool = True,
) -> dict:
"""main function to convert to analysis ready format


Args:
ibl_df (pd.DataFrame): the dataframe, as stored in the photometry.signal.pqt
data_columns (list[str], optional): The names of the columns in the dataframe that contain the signals of different fibers. By default, they are named RegionXX. If None is provided, All columns that start with `Region` are treated as data columns. Defaults to None.
data_columns (list[str], optional): The names of the columns in the dataframe that contain the signals of different fibers. By default, they are named RegionXX. If None is provided, All columns that start with `Region` or `G` are treated as data columns. Defaults to None.
time_column (str, optional): The name of the column that contains the timestamps. If None is provided, it is assumed that `time` is in the name. Defaults to None.
channel_column (str, optional): The name of the column that contains. Defaults to 'name'.
channel_names (list[str], optional): The names of the acquisition channel / frequency bands that are acquired. Defaults to None.
Expand All @@ -120,6 +153,9 @@ def from_ibl_dataframe(
# data_columns is a list of str that specifies the names of the column that hold the actual data, like 'RegionXX'
# channel_column is the column that specifies the temporally multiplexed acquisition channels

if validate:
ibl_df = validate_ibl_dataframe(ibl_df)

data_columns = infer_data_columns(ibl_df) if data_columns is None else data_columns

# infer name of time column if not provided
Expand Down Expand Up @@ -152,11 +188,19 @@ def from_ibl_dataframe(
def from_ibl_pqt(
signal_pqt_path: str | Path,
locations_pqt_path: Optional[str | Path] = None,
validate=True,
):
# read from a single pqt
# if both are provided, do both
"""reads in photometry data stored in the ibl format as a .pqt file. If provided, uses the metadata stored in the locations.pqt file as well.

Args:
signal_pqt_path (str | Path): _description_
locations_pqt_path (Optional[str | Path], optional): _description_. Defaults to None.

ibl_df = pd.read_parquet(signal_pqt_path)
Returns:
_type_: _description_
"""

ibl_df = from_ibl_pqt_to_ibl_df(signal_pqt_path, validate=validate)
if locations_pqt_path is not None:
locations_df = pd.read_parquet(locations_pqt_path)
return from_ibl_dataframes(ibl_df, locations_df)
Expand Down Expand Up @@ -195,7 +239,17 @@ def from_ibl_dataframes(ibl_df: pd.DataFrame, locations_df: pd.DataFrame):
def from_raw_neurophotometrics_file(
path: str | Path, drop_first=True, validate=True, version='new'
) -> dict:
# this one bypasses everything
"""reads in a file generated by the neurophotometrics FP3002 into the analysis ready format

Args:
path (str | Path): _description_
drop_first (bool, optional): Drop the The first frame, which has all LEDs on by default. Defaults to True.
validate (bool, optional): If True, validates the file. Defaults to True.
version (str, optional): 'new' or 'old' version of the neurophotometrics file format. Defaults to 'new'.

Returns:
dict: _description_
"""
ibl_df = from_raw_neurophotometrics_file_to_ibl_df(
path, drop_first=drop_first, validate=validate, version=version
)
Expand Down Expand Up @@ -233,7 +287,19 @@ def read_digital_inputs_csv(path: str | Path, validate=True) -> pd.DataFrame:
"""


def validate_ibl_dataframe(df: pd.DataFrame) -> pd.DataFrame: ...
def validate_ibl_dataframe(ibl_df: pd.DataFrame, data_columns=None) -> pd.DataFrame:
data_columns = infer_data_columns(ibl_df) if data_columns is None else data_columns
schema_ibl_data = pandera.DataFrameSchema(
columns=dict(
times=pandera.Column(pandera.Float64),
# valid=pandera.Column(pandera.Bool), # optionally present
wavelength=pandera.Column(pandera.Float64, nullable=True),
name=pandera.Column(pandera.String),
color=pandera.Column(pandera.String),
**{k: pandera.Column(pandera.Float64) for k in data_columns},
)
)
return schema_ibl_data.validate(ibl_df)


def validate_neurophotometrics_df(
Expand Down
16 changes: 10 additions & 6 deletions src/iblphotometry_tests/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@ def test_from_raw_neurophotometrics_file(self):
pd.testing.assert_frame_equal(dfs_a[key], dfs_b[key])

# from pqt files as they are returned from ONE by .load_dataset()
# def test_from_ibl_pqt(self):
# fpio.from_ibl_pqt(self.paths['photometry_signal_pqt'])
# fpio.from_ibl_pqt(
# self.paths['photometry_signal_pqt'],
# self.paths['photometryROI_locations_pqt'],
# )
def test_from_ibl_pqt(self):
datasets = ['carolina', 'alejandro']

for dataset in datasets:
self.set_paths(dataset)
fpio.from_ibl_pqt(self.paths['photometry_signal_pqt'])
fpio.from_ibl_pqt(
self.paths['photometry_signal_pqt'],
self.paths['photometryROI_locations_pqt'],
)
Loading