diff --git a/src/iblphotometry/io (Copy).py b/src/iblphotometry/io (Copy).py deleted file mode 100644 index b61d4a6..0000000 --- a/src/iblphotometry/io (Copy).py +++ /dev/null @@ -1,274 +0,0 @@ -import numpy as np -import pandas as pd -from pathlib import Path -import warnings -import pandera -from typing import Optional - -from iblphotometry.neurophotometrics import ( - LIGHT_SOURCE_MAP, - LED_STATES, -) - - -def from_raw_neurophotometrics_file_to_raw_df( - path: str | Path, - validate=True, -) -> pd.DataFrame: - path = Path(path) if isinstance(path, str) else path - match path.suffix: - case '.csv': - raw_df = pd.read_csv(path) - case '.pqt': - raw_df = pd.read_parquet(path) - - if validate: - raw_df = validate_neurophotometrics_df(raw_df) - - return raw_df - - -def from_raw_neurophotometrics_df_to_ibl_df( - raw_df: pd.DataFrame, rois=None, drop_first=True -) -> pd.DataFrame: - if rois is None: - rois = infer_data_columns(raw_df) - - ibl_df = raw_df.filter(items=rois, axis=1).sort_index(axis=1) - timestamp_name = ( - 'SystemTimestamp' if 'SystemTimestamp' in raw_df.columns else 'Timestamp' - ) - ibl_df['times'] = raw_df[timestamp_name] - ibl_df['wavelength'] = np.nan - ibl_df['name'] = '' - ibl_df['color'] = '' - - # TODO the names column in channel_meta_map should actually be user defined (experiment description file?) - channel_meta_map = pd.DataFrame(LIGHT_SOURCE_MAP) - led_states = pd.DataFrame(LED_STATES).set_index('Condition') - states = raw_df['LedState'] - - for state in states.unique(): - ir, ic = np.where(led_states == state) - # if not present, multiple LEDs are active - if ic.size == 0: - # find row - ir = np.argmax(led_states['No LED ON'] > state) - 1 - # find active combo - possible_led_combos = [(1, 2), (1, 3), (2, 3), (1, 2, 3)] - for combo in possible_led_combos: # drop enumerate - if state == sum([led_states.iloc[ir, c] for c in combo]): - name = '+'.join([channel_meta_map['name'][c] for c in combo]) - color = '+'.join([channel_meta_map['color'][c] for c in combo]) - wavelength = np.nan - ibl_df.loc[states == state, ['name', 'color', 'wavelength']] = ( - name, - color, - wavelength, - ) - else: - for cn in ['name', 'color', 'wavelength']: - ibl_df.loc[states == state, cn] = channel_meta_map.iloc[ic[0]][cn] - - # drop first frame - if drop_first: - ibl_df = ibl_df.iloc[1:].reset_index() - - return ibl_df - - -def from_raw_neurophotometrics_file_to_ibl_df( - path: str | Path, - drop_first=True, - validate=True, -) -> pd.DataFrame: - raw_df = from_raw_neurophotometrics_file_to_raw_df(path, validate=validate) - ibl_df = from_raw_neurophotometrics_df_to_ibl_df(raw_df, drop_first=drop_first) - - return ibl_df - - -def from_ibl_pqt_to_ibl_df(path: str | Path, validate=False): - if validate is True: - # TODO - raise NotImplementedError - return pd.read_parquet(path) - - -def from_ibl_dataframe( - ibl_df: pd.DataFrame, - data_columns: list[str] | None = None, - time_column: str | None = None, - channel_column: str = 'name', - channel_names: list[str] | None = None, - rename: dict | None = None, -) -> dict: - """main function to convert to analysis ready format - - - Args: - ibl_df (pd.DataFrame): the dataframe, as stored in the photometry.signal.pqt - data_columns (list[str], optional): The names of the columns in the dataframe that contain the signals of different fibers. By default, they are named RegionXX. If None is provided, All columns that start with `Region` are treated as data columns. Defaults to None. - time_column (str, optional): The name of the column that contains the timestamps. If None is provided, it is assumed that `time` is in the name. Defaults to None. - channel_column (str, optional): The name of the column that contains. Defaults to 'name'. - channel_names (list[str], optional): The names of the acquisition channel / frequency bands that are acquired. Defaults to None. - rename (dict, optional): a renaming map that maps the names of the columns to brain areas. Example: {'RegionXX':'DMS'}. Defaults to None. - - Returns: - dict: A dict with the keys being the names of the acquisition channels, the values being nap.TsdFrames with the columns containing the data of the different fibers - """ - # from a raw dataframe as it is stored in ONE (signal.pqt) - # data_columns is a list of str that specifies the names of the column that hold the actual data, like 'RegionXX' - # channel_column is the column that specifies the temporally multiplexed acquisition channels - - data_columns = infer_data_columns(ibl_df) if data_columns is None else data_columns - - # infer name of time column if not provided - if time_column is None: - time_columns = [col for col in ibl_df.columns if 'time' in col.lower()] - assert len(time_columns) == 1 - time_column = time_columns[0] - - # infer channel names if they are not explicitly provided - if channel_names is None: - channel_names = ibl_df[channel_column].unique() - - # drop empty acquisition channels - to_drop = ['None', ''] - channel_names = [ch for ch in channel_names if ch not in to_drop] - - dfs = {} - for channel in channel_names: - # get the data for the band - df = ibl_df.groupby(channel_column).get_group(channel) - # if rename dict is passed, rename Region0X to the corresponding brain region - if rename is not None: - df = df.rename(columns=rename) - data_columns = rename.values() - dfs[channel] = df.set_index(time_column)[data_columns] - - return dfs - - -def from_ibl_pqt( - signal_pqt_path: str | Path, - locations_pqt_path: Optional[str | Path] = None, -): - # read from a single pqt - # if both are provided, do both - - ibl_df = pd.read_parquet(signal_pqt_path) - if locations_pqt_path is not None: - locations_df = pd.read_parquet(locations_pqt_path) - return from_ibl_dataframes(ibl_df, locations_df) - else: - warnings.warn( - 'loading a photometry.signal.pqt file without its corresponding photometryROI.locations.pqt' - ) - data_columns = None - rename = None - - read_config = dict( - data_columns=data_columns, - time_column='times', - channel_column='name', - rename=rename, - ) - - return from_ibl_dataframe(ibl_df, **read_config) - - -def from_ibl_dataframes(ibl_df: pd.DataFrame, locations_df: pd.DataFrame): - # if locations are present - data_columns = (list(locations_df.index),) - rename = locations_df['brain_region'].to_dict() - - read_config = dict( - data_columns=data_columns, - time_column='times', - channel_column='name', - rename=rename, - ) - - return from_ibl_dataframe(ibl_df, **read_config) - - -def from_raw_neurophotometrics_file( - path: str | Path, - drop_first=True, - validate=True, -) -> dict: - # this one bypasses everything - ibl_df = from_raw_neurophotometrics_file_to_ibl_df( - path, drop_first=drop_first, validate=validate - ) - # data_columns = infer_data_columns(ibl_df) if data_columns is None else data_columns - read_config = dict( - # data_columns=data_columns, - time_column='times', - channel_column='name', - ) - return from_ibl_dataframe(ibl_df, **read_config) - -def read_digital_inputs_csv(path: str | Path, - validate=True) -> pd.DataFrame: - - df_digital_inputs = pd.read_csv(path, header=None) - df_digital_inputs.columns = ['ChannelName', 'Channel', 'AlwaysTrue', 'SystemTimestamp', 'ComputerTimestamp'] - if validate: - df_digital_inputs = validate_neurophotometrics_digital_inputs(df_digital_inputs) - return df_digital_inputs - -""" -## ## ### ## #### ######## ### ######## #### ####### ## ## -## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ## -## ## ## ## ## ## ## ## ## ## ## ## ## ## #### ## -## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## - ## ## ######### ## ## ## ## ######### ## ## ## ## ## #### - ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### - ### ## ## ######## #### ######## ## ## ## #### ####### ## ## -""" - - -def validate_ibl_dataframe(df: pd.DataFrame) -> pd.DataFrame: ... - - -def validate_neurophotometrics_df( - df: pd.DataFrame, - data_columns=None, -) -> pd.DataFrame: - data_columns = infer_data_columns(df) if data_columns is None else data_columns - - schema_raw_data = pandera.DataFrameSchema( - columns=dict( - FrameCounter=pandera.Column(pandera.Int64), - SystemTimestamp=pandera.Column(pandera.Float64), - LedState=pandera.Column(pandera.Int16, coerce=True), - ComputerTimestamp=pandera.Column(pandera.Float64), - **{k: pandera.Column(pandera.Float64) for k in data_columns}, - ) - ) - - return schema_raw_data.validate(df) - - -def validate_neurophotometrics_digital_inputs(df: pd.DataFrame) -> pd.DataFrame: - schema_digital_inputs = pandera.DataFrameSchema( - columns=dict( - ChannelName=pandera.Column(str, coerce=True), - Channel=pandera.Column(pandera.Int8, coerce=True), - AlwaysTrue=pandera.Column(bool, coerce=True), - SystemTimestamp=pandera.Column(pandera.Float64), - ComputerTimestamp=pandera.Column(pandera.Float64), - ) - ) - return schema_digital_inputs.validate(df) - - -def infer_data_columns(df: pd.DataFrame) -> list[str]: - # this hacky parser currently deals with the inconsistency between carolinas and alejandros extraction - # https://github.com/int-brain-lab/ibl-photometry/issues/35 - data_columns = [ - col for col in df.columns if col.startswith('Region') or col.startswith('G') - ] - return data_columns diff --git a/src/iblphotometry/io.py b/src/iblphotometry/io.py index b61d4a6..1ec8e9b 100644 --- a/src/iblphotometry/io.py +++ b/src/iblphotometry/io.py @@ -210,15 +210,21 @@ def from_raw_neurophotometrics_file( ) return from_ibl_dataframe(ibl_df, **read_config) -def read_digital_inputs_csv(path: str | Path, - validate=True) -> pd.DataFrame: - + +def read_digital_inputs_csv(path: str | Path, validate=True) -> pd.DataFrame: df_digital_inputs = pd.read_csv(path, header=None) - df_digital_inputs.columns = ['ChannelName', 'Channel', 'AlwaysTrue', 'SystemTimestamp', 'ComputerTimestamp'] + df_digital_inputs.columns = [ + 'ChannelName', + 'Channel', + 'AlwaysTrue', + 'SystemTimestamp', + 'ComputerTimestamp', + ] if validate: df_digital_inputs = validate_neurophotometrics_digital_inputs(df_digital_inputs) return df_digital_inputs + """ ## ## ### ## #### ######## ### ######## #### ####### ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ### ##