Skip to content

Commit

Permalink
Fix external link within file on export (#847)
Browse files Browse the repository at this point in the history
  • Loading branch information
rly authored Apr 13, 2023
1 parent f38e0b1 commit 963aea7
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 10 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# HDMF Changelog

## HDMF 3.5.5 (April 12, 2023)
## HDMF 3.5.5 (April 13, 2023)

### Bug fixes
- Fixed error during export where an external link to the same file was created in some situations.
@rly [#847](https://github.com/hdmf-dev/hdmf/pull/847)
- Remove unused, deprecated `codecov` package from dev installation requirements. @rly
[#849](https://github.com/hdmf-dev/hdmf/pull/849)
- Fix export with `'link_data': False'` not copying datasets in some situations. @rly
Expand Down
14 changes: 7 additions & 7 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ def export(self, **kwargs):
raise UnsupportedOperation("Cannot export from non-HDF5 backend %s to HDF5 with write argument "
"link_data=True." % src_io.__class__.__name__)

write_args['export_source'] = src_io.source # pass export_source=src_io.source to write_builder
write_args['export_source'] = os.path.abspath(src_io.source) if src_io.source is not None else None
ckwargs = kwargs.copy()
ckwargs['write_args'] = write_args
if not write_args.get('link_data', True):
Expand Down Expand Up @@ -580,7 +580,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
if sub_h5obj.name in ignore:
continue
link_type = h5obj.get(k, getlink=True)
if isinstance(link_type, SoftLink) or isinstance(link_type, ExternalLink):
if isinstance(link_type, (SoftLink, ExternalLink)):
# Reading links might be better suited in its own function
# get path of link (the key used for tracking what's been built)
target_path = link_type.path
Expand All @@ -595,7 +595,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
else:
builder = self.__read_group(target_obj, builder_name, ignore=ignore)
self.__set_built(sub_h5obj.file.filename, target_obj.id, builder)
link_builder = LinkBuilder(builder=builder, name=k, source=h5obj.file.filename)
link_builder = LinkBuilder(builder=builder, name=k, source=os.path.abspath(h5obj.file.filename))
link_builder.location = h5obj.name
self.__set_written(link_builder)
kwargs['links'][builder_name] = link_builder
Expand All @@ -619,7 +619,7 @@ def __read_group(self, h5obj, name=None, ignore=set()):
warnings.warn('Path to Group altered/broken at ' + os.path.join(h5obj.name, k), BrokenLinkWarning)
kwargs['datasets'][k] = None
continue
kwargs['source'] = h5obj.file.filename
kwargs['source'] = os.path.abspath(h5obj.file.filename)
ret = GroupBuilder(name, **kwargs)
ret.location = os.path.dirname(h5obj.name)
self.__set_written(ret)
Expand All @@ -637,7 +637,7 @@ def __read_dataset(self, h5obj, name=None):

if name is None:
name = str(os.path.basename(h5obj.name))
kwargs['source'] = h5obj.file.filename
kwargs['source'] = os.path.abspath(h5obj.file.filename)
ndims = len(h5obj.shape)
if ndims == 0: # read scalar
scalar = h5obj[()]
Expand Down Expand Up @@ -1025,13 +1025,13 @@ def write_link(self, **kwargs):
else:
write_source = export_source

if write_source == target_builder.source:
parent_filename = os.path.abspath(parent.file.filename)
if target_builder.source in (write_source, parent_filename):
link_obj = SoftLink(path)
self.logger.debug(" Creating SoftLink '%s/%s' to '%s'"
% (parent.name, name, link_obj.path))
elif target_builder.source is not None:
target_filename = os.path.abspath(target_builder.source)
parent_filename = os.path.abspath(parent.file.filename)
relative_path = os.path.relpath(target_filename, os.path.dirname(parent_filename))
if target_builder.location is not None:
path = target_builder.location + "/" + target_builder.name
Expand Down
5 changes: 4 additions & 1 deletion src/hdmf/backends/io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABCMeta, abstractmethod
import os
from pathlib import Path

from ..build import BuildManager, GroupBuilder
Expand All @@ -15,7 +16,9 @@ class HDMFIO(metaclass=ABCMeta):
def __init__(self, **kwargs):
manager, source = getargs('manager', 'source', kwargs)
if isinstance(source, Path):
source = str(source)
source = source.resolve()
elif isinstance(source, str):
source = os.path.abspath(source)

self.__manager = manager
self.__built = dict()
Expand Down
95 changes: 94 additions & 1 deletion tests/unit/test_io_hdf5_h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2417,6 +2417,65 @@ def test_soft_link_dataset(self):
# make sure the linked dataset is within the same file
self.assertEqual(read_foofile2.foofile_data.file.filename, self.paths[1])

def test_soft_link_group_modified(self):
"""Test that exporting a written file with soft linked groups keeps links within the file."""
foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
foobucket = FooBucket('bucket1', [foo1])
foofile = FooFile(buckets=[foobucket], foo_link=foo1)

with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io:
write_io.write(foofile)

with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io:
read_foofile2 = read_io.read()
read_foofile2.foo_link.set_modified() # trigger a rebuild of foo_link and its parents

with HDF5IO(self.paths[1], mode='w') as export_io:
export_io.export(src_io=read_io, container=read_foofile2)

with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io:
self.ios.append(read_io) # track IO objects for tearDown
read_foofile2 = read_io.read()

# make sure the linked group is within the same file
self.assertEqual(read_foofile2.foo_link.container_source, self.paths[1])

# make sure the linked group is a soft link
with File(self.paths[1], 'r') as f:
self.assertEqual(f['links/foo_link'].file.filename, self.paths[1])
self.assertIsInstance(f.get('links/foo_link', getlink=True), h5py.SoftLink)

def test_soft_link_group_modified_rel_path(self):
"""Test that exporting a written file with soft linked groups keeps links within the file."""
foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
foobucket = FooBucket('bucket1', [foo1])
foofile = FooFile(buckets=[foobucket], foo_link=foo1)
# make temp files in relative path location
self.paths[0] = os.path.basename(self.paths[0])
self.paths[1] = os.path.basename(self.paths[1])

with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io:
write_io.write(foofile)

with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io:
read_foofile2 = read_io.read()
read_foofile2.foo_link.set_modified() # trigger a rebuild of foo_link and its parents

with HDF5IO(self.paths[1], mode='w') as export_io:
export_io.export(src_io=read_io, container=read_foofile2)

with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io:
self.ios.append(read_io) # track IO objects for tearDown
read_foofile2 = read_io.read()

# make sure the linked group is within the same file
self.assertEqual(read_foofile2.foo_link.container_source, os.path.abspath(self.paths[1]))

# make sure the linked group is a soft link
with File(self.paths[1], 'r') as f:
self.assertEqual(f['links/foo_link'].file.filename, self.paths[1])
self.assertIsInstance(f.get('links/foo_link', getlink=True), h5py.SoftLink)

def test_external_link_group(self):
"""Test that exporting a written file with external linked groups maintains the links."""
foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
Expand All @@ -2437,7 +2496,6 @@ def test_external_link_group(self):

with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io:
self.ios.append(read_io) # track IO objects for tearDown
read_foofile2 = read_io.read()

with HDF5IO(self.paths[2], mode='w') as export_io:
export_io.export(src_io=read_io)
Expand All @@ -2449,6 +2507,41 @@ def test_external_link_group(self):
# make sure the linked group is read from the first file
self.assertEqual(read_foofile2.foo_link.container_source, self.paths[0])

def test_external_link_group_rel_path(self):
"""Test that exporting a written file from a relative filepath works."""
foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
foobucket = FooBucket('bucket1', [foo1])
foofile = FooFile(buckets=[foobucket])
# make temp files in relative path location
self.paths[0] = os.path.basename(self.paths[0])
self.paths[1] = os.path.basename(self.paths[1])
self.paths[2] = os.path.basename(self.paths[2])

with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as read_io:
read_io.write(foofile)

manager = get_foo_buildmanager()
with HDF5IO(self.paths[0], manager=manager, mode='r') as read_io:
read_foofile = read_io.read()
# make external link to existing group
foofile2 = FooFile(foo_link=read_foofile.buckets['bucket1'].foos['foo1'])

with HDF5IO(self.paths[1], manager=manager, mode='w') as write_io:
write_io.write(foofile2)

with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io:
self.ios.append(read_io) # track IO objects for tearDown

with HDF5IO(self.paths[2], mode='w') as export_io:
export_io.export(src_io=read_io)

with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io:
self.ios.append(read_io) # track IO objects for tearDown
read_foofile2 = read_io.read()

# make sure the linked group is read from the first file
self.assertEqual(read_foofile2.foo_link.container_source, os.path.abspath(self.paths[0]))

def test_external_link_dataset(self):
"""Test that exporting a written file with external linked datasets maintains the links."""
foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)
Expand Down

0 comments on commit 963aea7

Please sign in to comment.