Skip to content

Commit

Permalink
Format doctests for mkdocs
Browse files Browse the repository at this point in the history
  • Loading branch information
bjhardcastle committed Dec 13, 2023
1 parent 431ac44 commit dabfc62
Show file tree
Hide file tree
Showing 10 changed files with 123 additions and 128 deletions.
28 changes: 14 additions & 14 deletions .archive/npc_lims/metadata/record_dbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,13 +224,13 @@ def execute(self, query: str) -> None:
class TestDBHub(SqliteDBHub):
"""Test database on dbhub.io.
>>> db = TestDBHub()
>>> db.execute("DROP TABLE IF EXISTS test;")
>>> db.create()
>>> db.insert("test", {'id': 1, 'name': 'one'}, {'id': 2, 'name': 'two'})
>>> db.query("SELECT * FROM test;")
>>> db = TestDBHub()
>>> db.execute("DROP TABLE IF EXISTS test;")
>>> db.create()
>>> db.insert("test", {'id': 1, 'name': 'one'}, {'id': 2, 'name': 'two'})
>>> db.query("SELECT * FROM test;")
({'id': 1, 'name': 'one'}, {'id': 2, 'name': 'two'})
>>> _ = db.execute("DROP TABLE test;")
>>> _ = db.execute("DROP TABLE test;")
"""

db_name = "test.db"
Expand All @@ -242,15 +242,15 @@ class TestDBHub(SqliteDBHub):
class TestLocalDB(SqliteLocalDB):
"""Test database on local file.
>>> db = TestLocalDB()
>>> db.execute("DROP TABLE IF EXISTS test;")
>>> db.create()
>>> db.insert("test", {'id': 1, 'name': 'one'}, {'id': 2, 'name': 'two'})
>>> db.query("SELECT * FROM test;")
>>> db = TestLocalDB()
>>> db.execute("DROP TABLE IF EXISTS test;")
>>> db.create()
>>> db.insert("test", {'id': 1, 'name': 'one'}, {'id': 2, 'name': 'two'})
>>> db.query("SELECT * FROM test;")
({'id': 1, 'name': 'one'}, {'id': 2, 'name': 'two'})
>>> _ = db.execute("DROP TABLE test;")
>>> db.close()
>>> db.path.unlink()
>>> _ = db.execute("DROP TABLE test;")
>>> db.close()
>>> db.path.unlink()
"""

db_name = "test.sqlite"
Expand Down
14 changes: 7 additions & 7 deletions .archive/npc_lims/metadata/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,15 @@ class Session(RecordWithNWB):
@dataclasses.dataclass
class Epoch(RecordWithNWB):
"""
>>> from npc_lims import NWBSqliteDBHub as DB
>>> from npc_lims import NWBSqliteDBHub as DB
>>> epoch = Epoch('626791_2022-08-15', '11:23:36', '12:23:54', ['DynamicRouting1'])
>>> DB().add_records(epoch)
>>> epoch = Epoch('626791_2022-08-15', '11:23:36', '12:23:54', ['DynamicRouting1'])
>>> DB().add_records(epoch)
>>> all_epochs = DB().get_records(Epoch)
>>> assert epoch in all_epochs, f"{epoch=} not in {all_epochs=}"
>>> session_epochs = DB().get_records(Epoch, session_id='626791_2022-08-15')
>>> session_epochs[0].tags
>>> all_epochs = DB().get_records(Epoch)
>>> assert epoch in all_epochs, f"{epoch=} not in {all_epochs=}"
>>> session_epochs = DB().get_records(Epoch, session_id='626791_2022-08-15')
>>> session_epochs[0].tags
['DynamicRouting1']
"""

Expand Down
22 changes: 11 additions & 11 deletions .archive/npc_lims/state/redis_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,22 @@ class State(collections.abc.MutableMapping):
- dict interface provides `keys`, `get`, `setdefault`, `pop`, etc.
- accepted value types are str, int, float, bool, None
>>> test_id = 0
>>> state = State(test_id)
>>> state['test'] = 1.0
>>> state['test']
>>> test_id = 0
>>> state = State(test_id)
>>> state['test'] = 1.0
>>> state['test']
1.0
>>> state['test'] = 'test'
>>> state['test']
>>> state['test'] = 'test'
>>> state['test']
'test'
>>> all('test' in _ for _ in (state, state.keys(), state.values()))
>>> all('test' in _ for _ in (state, state.keys(), state.values()))
True
>>> state.setdefault('test', True)
>>> state.setdefault('test', True)
'test'
>>> state.pop('test')
>>> state.pop('test')
'test'
>>> del state['test']
>>> state.get('test') is None
>>> del state['test']
>>> state.get('test') is None
True
"""

Expand Down
55 changes: 25 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@ issues](https://img.shields.io/github/issues/alleninstitute/npc_lims?logo=github

## quickstart

- make a new Python >=3.9 virtual environment with conda or venv (lighter option, since this package does not require pandas, numpy etc.):
- make a new Python >=3.9 virtual environment with conda or venv (lighter option, since this package does not require pandas, numpy etc.):
```bash
python -m venv .venv
```

- activate the virtual environment:
- Windows
```cmd
Expand All @@ -31,56 +30,52 @@ issues](https://img.shields.io/github/issues/alleninstitute/npc_lims?logo=github
```bash
python -m pip install npc_lims
```

- setup credentials
- required environment variables:
- AWS S3
- `AWS_DEFAULT_REGION`
- `AWS_ACCESS_KEY_ID`
- `AWS_SECRET_ACCESS_KEY`
- to find and read files on S3
- must have read access on relevant aind buckets
- can be in a standard `~/.aws` location, as used by AWS CLI or boto3
- CodeOcean API
- `CODE_OCEAN_API_TOKEN`
- `CODE_OCEAN_DOMAIN`
- `AWS_DEFAULT_REGION`
- `AWS_ACCESS_KEY_ID`
- `AWS_SECRET_ACCESS_KEY`
- to find and read files on S3
- must have read access on relevant aind buckets
- can be in a standard `~/.aws` location, as used by AWS CLI or boto3
- CodeOcean API
- `CODE_OCEAN_API_TOKEN`
- `CODE_OCEAN_DOMAIN`
- to find processed data in "data assets" via the Codeocean API
- generated in CodeOcean:
- right click on `Account` (bottom left, person icon)
- click `User Secrets` - these are secrets than can be made available as environment variables in CodeOcean capsules
- go to `Access Tokens` and click `Generate new token` - this is for programatically querying CodeOcean's databases
- in `Token Name` enter `Codeocean API (read)` and check `read` on capsules and datasets
- a token will be generated: click copy (storing it in a password manager, if you use one)
- head back to `User Secrets` where we'll paste it into a new secret via `Add secret > API credentials`
- in `description` enter `Codeocean API (read)`
- in `API key` enter `CODE_OCEAN_API_KEY`
- in `API secret` paste the copied secret from before (should start with `cop_`...)
`CODE_OCEAN_DOMAIN` is the codeocean https address, up to and including `.org`
- head back to `User Secrets` where we'll paste it into a new secret via `Add secret > API credentials` - in `description` enter `Codeocean API (read)` - in `API key` enter `CODE_OCEAN_API_KEY` - in `API secret` paste the copied secret from before (should start with `cop_`...)
`CODE_OCEAN_DOMAIN` is the codeocean https address, up to and including `.org`
- environment variables can also be specified in a file named `.env` in the current working directory
- example: https://www.dotenv.org/docs/security/env.html
- be very careful that this file does not get pushed to public locations, e.g. github
- if using git, add it to a `.gitignore` file in your project's root directory:
- be very careful that this file does not get pushed to public locations, e.g. github
- if using git, add it to a `.gitignore` file in your project's root directory:
```gitignore
.env*
```
- now in Python we can find sessions that are available to work with:
```python
>>> import npc_lims;
>>> import npc_lims;
# get a sequence of `SessionInfo` dataclass instances, one per session:
>>> tracked_sessions: tuple[npc_lims.SessionInfo, ...] = npc_lims.get_session_info()
>>> tracked_sessions: tuple[npc_lims.SessionInfo, ...] = npc_lims.get_session_info()
# each `SessionInfo` instance has minimal metadata about its session:
>>> tracked_sessions[0] # doctest: +SKIP
>>> tracked_sessions[0] # doctest: +SKIP
npc_lims.SessionInfo(id='626791_2022-08-15', subject=626791, date='2022-08-15', idx=0, project='DRPilotSession', is_ephys=True, is_sync=True, allen_path=PosixUPath('//allen/programs/mindscope/workgroups/dynamicrouting/PilotEphys/Task 2 pilot/DRpilot_626791_20220815'))
>>> tracked_sessions[0].is_ephys # doctest: +SKIP
>>> tracked_sessions[0].is_ephys # doctest: +SKIP
False
# currently, we're only tracking behavior and ephys sessions that use variants of https://github.com/samgale/DynamicRoutingTask/blob/main/TaskControl.py:
>>> all(s.date.year >= 2022 for s in tracked_sessions)
# currently, we're only tracking behavior and ephys sessions that use variants of https://github.com/samgale/DynamicRoutingTask/blob/main/TaskControl.py:
>>> all(s.date.year >= 2022 for s in tracked_sessions)
True
```

- "tracked sessions" are discovered via 3 routes:
Expand Down
18 changes: 9 additions & 9 deletions src/npc_lims/metadata/codeocean.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,9 @@ def get_data_asset(asset: str | uuid.UUID | DataAssetAPI) -> DataAssetAPI:

def is_raw_data_asset(asset: str | DataAssetAPI) -> bool:
"""
>>> is_raw_data_asset('83636983-f80d-42d6-a075-09b60c6abd5e')
>>> is_raw_data_asset('83636983-f80d-42d6-a075-09b60c6abd5e')
True
>>> is_raw_data_asset('173e2fdc-0ca3-4a4e-9886-b74207a91a9a')
>>> is_raw_data_asset('173e2fdc-0ca3-4a4e-9886-b74207a91a9a')
False
"""
asset = get_data_asset(asset)
Expand All @@ -218,9 +218,9 @@ def is_raw_data_asset(asset: str | DataAssetAPI) -> bool:

def is_sorted_data_asset(asset: str | DataAssetAPI) -> bool:
"""
>>> is_sorted_data_asset('173e2fdc-0ca3-4a4e-9886-b74207a91a9a')
>>> is_sorted_data_asset('173e2fdc-0ca3-4a4e-9886-b74207a91a9a')
True
>>> is_sorted_data_asset('83636983-f80d-42d6-a075-09b60c6abd5e')
>>> is_sorted_data_asset('83636983-f80d-42d6-a075-09b60c6abd5e')
False
"""
asset = get_data_asset(asset)
Expand All @@ -233,7 +233,7 @@ def get_session_raw_data_asset(
session: str | npc_session.SessionRecord,
) -> DataAssetAPI:
"""
>>> get_session_raw_data_asset('668759_20230711')["id"]
>>> get_session_raw_data_asset('668759_20230711')["id"]
'83636983-f80d-42d6-a075-09b60c6abd5e'
"""
session = npc_session.SessionRecord(session)
Expand All @@ -251,10 +251,10 @@ def get_surface_channel_root(session: str | npc_session.SessionRecord) -> upath.
"""Reconstruct path to surface channel data in bucket (e.g. on s3) using data-asset
info from Code Ocean.
>>> get_surface_channel_root('660023_20230808')
>>> get_surface_channel_root('660023_20230808')
S3Path('s3://aind-ephys-data/ecephys_660023_2023-08-08_15-11-14')
>>> assert get_surface_channel_root('660023_20230808') != get_raw_data_root('660023_20230808')
>>> get_surface_channel_root('649943_20230216')
>>> assert get_surface_channel_root('660023_20230808') != get_raw_data_root('660023_20230808')
>>> get_surface_channel_root('649943_20230216')
Traceback (most recent call last):
...
FileNotFoundError: 649943_20230216 has no surface channel data assets
Expand All @@ -277,7 +277,7 @@ def get_raw_data_root(session: str | npc_session.SessionRecord) -> upath.UPath:
"""Reconstruct path to raw data in bucket (e.g. on s3) using data-asset
info from Code Ocean.
>>> get_raw_data_root('668759_20230711')
>>> get_raw_data_root('668759_20230711')
S3Path('s3://aind-ephys-data/ecephys_668759_2023-07-11_13-07-32')
"""
session = npc_session.SessionRecord(session)
Expand Down
4 changes: 2 additions & 2 deletions src/npc_lims/metadata/spreadsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def get_training_db(nsb: bool = False) -> sqlite3.Connection:
"""
Download db to tempdir, open connection, return connection.
>>> assert get_training_db()
>>> assert get_training_db()
"""
db_path = upath.UPath(tempfile.mkstemp(suffix=".db")[1])
s3_path = next(
Expand Down Expand Up @@ -56,7 +56,7 @@ def update_training_dbs() -> None:
Read spreadsheets from the data repo and write them to corresponding
databases, currently sqlite files in the same directory.
>>> update_training_dbs()
>>> update_training_dbs()
"""
for spreadsheet, sqlite in zip(
get_training_spreadsheet_paths(), get_training_sqlite_paths()
Expand Down
10 changes: 5 additions & 5 deletions src/npc_lims/paths/codeocean.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def get_raw_data_paths_from_s3(
"""All top-level files and folders from the `ephys` & `behavior`
subdirectories in a session's raw data folder on s3.
>>> files = get_raw_data_paths_from_s3 ('668759_20230711')
>>> assert len(files) > 0
>>> files = get_raw_data_paths_from_s3 ('668759_20230711')
>>> assert len(files) > 0
"""
raw_data_root = metadata.get_raw_data_root(session)
directories: Iterator = (
Expand Down Expand Up @@ -52,9 +52,9 @@ def get_hdf5_stim_files_from_s3(
"""All the stim files for a session, from the synced
`DynamicRoutingTask/Data` folder on s3.
>>> files = get_hdf5_stim_files_from_s3('668759_20230711')
>>> assert len(files) > 0
>>> files[0].name, files[0].time
>>> files = get_hdf5_stim_files_from_s3('668759_20230711')
>>> assert len(files) > 0
>>> files[0].name, files[0].time
('DynamicRouting1', '13:25:00')
"""
session = npc_session.SessionRecord(session)
Expand Down
26 changes: 13 additions & 13 deletions src/npc_lims/paths/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def get_raw_data_paths_from_s3(
"""All top-level files and folders from the `ephys` & `behavior`
subdirectories in a session's raw data folder on s3.
>>> files = get_raw_data_paths_from_s3 ('668759_20230711')
>>> assert len(files) > 0
>>> files = get_raw_data_paths_from_s3 ('668759_20230711')
>>> assert len(files) > 0
"""
raw_data_root = codeocean.get_raw_data_root(session)
directories: Iterator[upath.UPath] = (
Expand All @@ -75,8 +75,8 @@ def get_sorted_data_paths_from_s3(
) -> tuple[upath.UPath, ...]:
"""
Gets the top level files/folders for the sorted data
>>> sorted_data_s3_paths = get_sorted_data_paths_from_s3('668759_20230711')
>>> assert len(sorted_data_s3_paths) > 0
>>> sorted_data_s3_paths = get_sorted_data_paths_from_s3('668759_20230711')
>>> assert len(sorted_data_s3_paths) > 0
"""
if sorted_data_asset_id is not None:
sorted_data_asset = codeocean.get_data_asset(sorted_data_asset_id)
Expand Down Expand Up @@ -108,7 +108,7 @@ def get_settings_xml_path_from_s3(
@functools.cache
def get_h5_sync_from_s3(session: str | npc_session.SessionRecord) -> upath.UPath:
"""
>>> get_h5_sync_from_s3('662892_20230821')
>>> get_h5_sync_from_s3('662892_20230821')
S3Path('s3://aind-ephys-data/ecephys_662892_2023-08-21_12-43-45/behavior/20230821T124345.h5')
"""
raw_data_paths_s3 = get_raw_data_paths_from_s3(session)
Expand Down Expand Up @@ -139,7 +139,7 @@ def get_spike_sorting_device_path_from_s3(
session: str | npc_session.SessionRecord, device_name: str
) -> upath.UPath:
"""
>>> get_spike_sorting_device_path_from_s3('662892_20230821', 'ProbeA')
>>> get_spike_sorting_device_path_from_s3('662892_20230821', 'ProbeA')
S3Path('s3://codeocean-s3datasetsbucket-1u41qdg42ur9/d527db85-39b7-4c4f-a465-9ca499b0ca47/spikesorted/experiment1_Record Node 102#Neuropix-PXI-100.ProbeA-AP_recording1/sorting_cached.npz')
"""
spike_sorted_paths = get_spike_sorted_paths_from_s3(session)
Expand Down Expand Up @@ -273,9 +273,9 @@ def get_tissuecyte_annotation_files_from_s3(
"""For each probe inserted, get a csv file containing CCF coordinates for each
electrode (channel) on the probe.
>>> electrode_files = get_tissuecyte_annotation_files_from_s3('626791_2022-08-16')
>>> assert len(electrode_files) > 0
>>> electrode_files[0].name
>>> electrode_files = get_tissuecyte_annotation_files_from_s3('626791_2022-08-16')
>>> assert len(electrode_files) > 0
>>> electrode_files[0].name
'Probe_A2_channels_626791_warped_processed.csv'
"""
session = npc_session.SessionRecord(session)
Expand Down Expand Up @@ -319,9 +319,9 @@ def get_hdf5_stim_files_from_s3(
- filters out files that are obviously wrong
>>> files = get_hdf5_stim_files_from_s3('668759_20230711')
>>> assert len(files) > 0
>>> files[0].name, files[0].time
>>> files = get_hdf5_stim_files_from_s3('668759_20230711')
>>> assert len(files) > 0
>>> files[0].name, files[0].time
('DynamicRouting1', '13:25:00')
"""
session = npc_session.SessionRecord(session)
Expand Down Expand Up @@ -361,7 +361,7 @@ def get_nwb_file_from_s3(
session: str | npc_session.SessionRecord,
) -> upath.UPath:
"""
>>> get_nwb_file_from_s3('636766_20230125')
>>> get_nwb_file_from_s3('636766_20230125')
S3Path('s3://aind-scratch-data/ben.hardcastle/nwb/nwb/DRpilot_636766_20230125.nwb')
"""
session = npc_session.SessionRecord(session)
Expand Down
Loading

0 comments on commit dabfc62

Please sign in to comment.