From 963aea736d1b1fe383707aec293c93860771b3a2 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 13 Apr 2023 15:02:43 -0700 Subject: [PATCH] Fix external link within file on export (#847) --- CHANGELOG.md | 4 +- src/hdmf/backends/hdf5/h5tools.py | 14 ++--- src/hdmf/backends/io.py | 5 +- tests/unit/test_io_hdf5_h5tools.py | 95 +++++++++++++++++++++++++++++- 4 files changed, 108 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f1d19ea0..28e65d45f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,10 @@ # HDMF Changelog -## HDMF 3.5.5 (April 12, 2023) +## HDMF 3.5.5 (April 13, 2023) ### Bug fixes +- Fixed error during export where an external link to the same file was created in some situations. + @rly [#847](https://github.com/hdmf-dev/hdmf/pull/847) - Remove unused, deprecated `codecov` package from dev installation requirements. @rly [#849](https://github.com/hdmf-dev/hdmf/pull/849) - Fix export with `'link_data': False'` not copying datasets in some situations. @rly diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 6f78d95e6..f4b93216a 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -414,7 +414,7 @@ def export(self, **kwargs): raise UnsupportedOperation("Cannot export from non-HDF5 backend %s to HDF5 with write argument " "link_data=True." % src_io.__class__.__name__) - write_args['export_source'] = src_io.source # pass export_source=src_io.source to write_builder + write_args['export_source'] = os.path.abspath(src_io.source) if src_io.source is not None else None ckwargs = kwargs.copy() ckwargs['write_args'] = write_args if not write_args.get('link_data', True): @@ -580,7 +580,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): if sub_h5obj.name in ignore: continue link_type = h5obj.get(k, getlink=True) - if isinstance(link_type, SoftLink) or isinstance(link_type, ExternalLink): + if isinstance(link_type, (SoftLink, ExternalLink)): # Reading links might be better suited in its own function # get path of link (the key used for tracking what's been built) target_path = link_type.path @@ -595,7 +595,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): else: builder = self.__read_group(target_obj, builder_name, ignore=ignore) self.__set_built(sub_h5obj.file.filename, target_obj.id, builder) - link_builder = LinkBuilder(builder=builder, name=k, source=h5obj.file.filename) + link_builder = LinkBuilder(builder=builder, name=k, source=os.path.abspath(h5obj.file.filename)) link_builder.location = h5obj.name self.__set_written(link_builder) kwargs['links'][builder_name] = link_builder @@ -619,7 +619,7 @@ def __read_group(self, h5obj, name=None, ignore=set()): warnings.warn('Path to Group altered/broken at ' + os.path.join(h5obj.name, k), BrokenLinkWarning) kwargs['datasets'][k] = None continue - kwargs['source'] = h5obj.file.filename + kwargs['source'] = os.path.abspath(h5obj.file.filename) ret = GroupBuilder(name, **kwargs) ret.location = os.path.dirname(h5obj.name) self.__set_written(ret) @@ -637,7 +637,7 @@ def __read_dataset(self, h5obj, name=None): if name is None: name = str(os.path.basename(h5obj.name)) - kwargs['source'] = h5obj.file.filename + kwargs['source'] = os.path.abspath(h5obj.file.filename) ndims = len(h5obj.shape) if ndims == 0: # read scalar scalar = h5obj[()] @@ -1025,13 +1025,13 @@ def write_link(self, **kwargs): else: write_source = export_source - if write_source == target_builder.source: + parent_filename = os.path.abspath(parent.file.filename) + if target_builder.source in (write_source, parent_filename): link_obj = SoftLink(path) self.logger.debug(" Creating SoftLink '%s/%s' to '%s'" % (parent.name, name, link_obj.path)) elif target_builder.source is not None: target_filename = os.path.abspath(target_builder.source) - parent_filename = os.path.abspath(parent.file.filename) relative_path = os.path.relpath(target_filename, os.path.dirname(parent_filename)) if target_builder.location is not None: path = target_builder.location + "/" + target_builder.name diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 0db5e81c9..7d9e6a660 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -1,4 +1,5 @@ from abc import ABCMeta, abstractmethod +import os from pathlib import Path from ..build import BuildManager, GroupBuilder @@ -15,7 +16,9 @@ class HDMFIO(metaclass=ABCMeta): def __init__(self, **kwargs): manager, source = getargs('manager', 'source', kwargs) if isinstance(source, Path): - source = str(source) + source = source.resolve() + elif isinstance(source, str): + source = os.path.abspath(source) self.__manager = manager self.__built = dict() diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index 3951198ee..a66a5a086 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -2417,6 +2417,65 @@ def test_soft_link_dataset(self): # make sure the linked dataset is within the same file self.assertEqual(read_foofile2.foofile_data.file.filename, self.paths[1]) + def test_soft_link_group_modified(self): + """Test that exporting a written file with soft linked groups keeps links within the file.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket], foo_link=foo1) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile2 = read_io.read() + read_foofile2.foo_link.set_modified() # trigger a rebuild of foo_link and its parents + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile2) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is within the same file + self.assertEqual(read_foofile2.foo_link.container_source, self.paths[1]) + + # make sure the linked group is a soft link + with File(self.paths[1], 'r') as f: + self.assertEqual(f['links/foo_link'].file.filename, self.paths[1]) + self.assertIsInstance(f.get('links/foo_link', getlink=True), h5py.SoftLink) + + def test_soft_link_group_modified_rel_path(self): + """Test that exporting a written file with soft linked groups keeps links within the file.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket], foo_link=foo1) + # make temp files in relative path location + self.paths[0] = os.path.basename(self.paths[0]) + self.paths[1] = os.path.basename(self.paths[1]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile2 = read_io.read() + read_foofile2.foo_link.set_modified() # trigger a rebuild of foo_link and its parents + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile2) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is within the same file + self.assertEqual(read_foofile2.foo_link.container_source, os.path.abspath(self.paths[1])) + + # make sure the linked group is a soft link + with File(self.paths[1], 'r') as f: + self.assertEqual(f['links/foo_link'].file.filename, self.paths[1]) + self.assertIsInstance(f.get('links/foo_link', getlink=True), h5py.SoftLink) + def test_external_link_group(self): """Test that exporting a written file with external linked groups maintains the links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) @@ -2437,7 +2496,6 @@ def test_external_link_group(self): with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: self.ios.append(read_io) # track IO objects for tearDown - read_foofile2 = read_io.read() with HDF5IO(self.paths[2], mode='w') as export_io: export_io.export(src_io=read_io) @@ -2449,6 +2507,41 @@ def test_external_link_group(self): # make sure the linked group is read from the first file self.assertEqual(read_foofile2.foo_link.container_source, self.paths[0]) + def test_external_link_group_rel_path(self): + """Test that exporting a written file from a relative filepath works.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket]) + # make temp files in relative path location + self.paths[0] = os.path.basename(self.paths[0]) + self.paths[1] = os.path.basename(self.paths[1]) + self.paths[2] = os.path.basename(self.paths[2]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as read_io: + read_io.write(foofile) + + manager = get_foo_buildmanager() + with HDF5IO(self.paths[0], manager=manager, mode='r') as read_io: + read_foofile = read_io.read() + # make external link to existing group + foofile2 = FooFile(foo_link=read_foofile.buckets['bucket1'].foos['foo1']) + + with HDF5IO(self.paths[1], manager=manager, mode='w') as write_io: + write_io.write(foofile2) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + + with HDF5IO(self.paths[2], mode='w') as export_io: + export_io.export(src_io=read_io) + + with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is read from the first file + self.assertEqual(read_foofile2.foo_link.container_source, os.path.abspath(self.paths[0])) + def test_external_link_dataset(self): """Test that exporting a written file with external linked datasets maintains the links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14)