Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support 'single-record-per-directory' format #82

Merged
merged 1 commit into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions datalad_tabby/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from datalad_tabby.tests.fixtures import (
# provides the tabby "demorecord" that is shipped with the sources
tabby_tsv_record,
# same again, but in a single-record-per-dir layout
tabby_tsv_singledir_record,
# elementary tabby record comprising the key TSV buildiung blocks
tabby_record_basic_components,
# no-LD elementary record with overrides
Expand Down
19 changes: 14 additions & 5 deletions datalad_tabby/io/load_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,19 @@ def _build_overrides(src: Path, obj: Dict):


def _get_corresponding_sheet_fpath(fpath: Path, sheet_name: str) -> Path:
return fpath.parent / \
f'{_get_tabby_prefix_from_sheet_fpath(fpath)}_{sheet_name}.tsv'
prefix = _get_tabby_prefix_from_sheet_fpath(fpath)
if prefix:
return fpath.parent / f'{prefix}_{sheet_name}.tsv'
else:
return fpath.parent / f'{sheet_name}.tsv'


def _get_record_context_fpath(fpath: Path) -> Path:
prefix = _get_tabby_prefix_from_sheet_fpath(fpath)
return fpath.parent / f'{prefix}.ctx.jsonld'
if prefix:
return fpath.parent / f'{prefix}.ctx.jsonld'
else:
return fpath.parent / f'ctx.jsonld'


def _get_corresponding_context_fpath(fpath: Path) -> Path:
Expand All @@ -85,8 +91,11 @@ def _get_corresponding_override_fpath(fpath: Path) -> Path:

def _get_tabby_prefix_from_sheet_fpath(fpath: Path) -> str:
stem = fpath.stem
# stem up to, but not including, the last '_'
return stem[:(-1) * stem[::-1].index('_') - 1]
if '_' not in stem:
return ''
else:
# stem up to, but not including, the last '_'
return stem[:(-1) * stem[::-1].index('_') - 1]


def _get_index_after_last_nonempty(val: List) -> int:
Expand Down
6 changes: 6 additions & 0 deletions datalad_tabby/io/tests/test_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,9 @@ def test_load_almost_tabby_import(tmp_path):

rec = load_tabby(src)
assert rec['dummy'] == '@tabby-murks'


def test_load_singldir_format(tabby_tsv_record, tabby_tsv_singledir_record):
rec = load_tabby(tabby_tsv_record['root_sheet'])
srec = load_tabby(tabby_tsv_singledir_record['root_sheet'])
assert rec == srec
31 changes: 31 additions & 0 deletions datalad_tabby/tests/fixtures.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from pathlib import Path
import pytest
from shutil import copyfile

from datalad_next.tests.utils import md5sum

Expand All @@ -23,6 +24,36 @@ def tabby_tsv_record():
)


@pytest.fixture(autouse=False, scope="session")
def tabby_tsv_singledir_record(tmp_path_factory):
srcdir = Path(dttests.__file__).parent / 'data' / 'demorecord'
rfiles = list(srcdir.glob('tabbydemo*'))
rdir = tmp_path_factory.mktemp("demorecord_single") / 'tabbydemo'
rdir.mkdir()

# we copy all the files, windows does not play with symlinks in general
dst_files = [
# strip the prefix from the filename, clean `.ctx.jsonld`
rdir / ('ctx.jsonld'
if fpath.name == 'tabbydemo.ctx.jsonld'
else fpath.name.split('_')[-1].lstrip('.'))
for fpath in rfiles
]
for s, d in zip(rfiles, dst_files):
copyfile(s, d)

root_sheet = rdir / 'dataset.tsv'
assert root_sheet in dst_files
assert root_sheet.exists()

sheets = [f for f in dst_files if f.name.endswith('.tsv')]
yield dict(
root_sheet=root_sheet,
sheets=sheets,
md5={s.name: md5sum(s) for s in sheets},
)


@pytest.fixture(scope="session")
def tabby_record_basic_components(tmp_path_factory):
rdir = tmp_path_factory.mktemp("rec")
Expand Down