From f57c718877226959ab3cde76cb05b45d80012b52 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:25:21 -0700 Subject: [PATCH 1/3] Update file annotation store process to require filename be present in order to annotate file --- schematic/store/synapse.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 4c1b900a8..973bdabcc 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -2266,8 +2266,19 @@ async def add_annotations_to_entities_files( """ - # Expected behavior is to annotate files if `Filename` is present and if file_annotations_upload is set to True regardless of `-mrt` setting - if "filename" in [col.lower() for col in manifest.columns]: + filename_column_present = False + filename_column = None + + # Find the exact column name for 'Filename' + for col in manifest.columns: + if col.lower() == "filename": + filename_column_present = True + filename_column = col + break + + # Expected behavior is to annotate files if `Filename` is present and if + # file_annotations_upload is set to True regardless of `-mrt` setting + if filename_column_present: # get current list of files and store as dataframe dataset_files = self.getFilesInStorageDataset(datasetId) files_and_entityIds = self._get_file_entityIds( @@ -2295,6 +2306,9 @@ async def add_annotations_to_entities_files( row["entityId"] = manifest_synapse_table_id manifest.loc[idx, "entityId"] = manifest_synapse_table_id entityId = "" + elif filename_column_present and not row[filename_column]: + # Skip any attempt to annotate a row which does not have a filename + entityId = "" else: # get the file id of the file to annotate, collected in above step. entityId = row["entityId"] From e832d0266c748a1c3737d83a2f1cf9fdd141027b Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:53:20 -0700 Subject: [PATCH 2/3] Revert "Update file annotation store process to require filename be present in order to annotate file" This reverts commit f57c718877226959ab3cde76cb05b45d80012b52. --- schematic/store/synapse.py | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 973bdabcc..4c1b900a8 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -2266,19 +2266,8 @@ async def add_annotations_to_entities_files( """ - filename_column_present = False - filename_column = None - - # Find the exact column name for 'Filename' - for col in manifest.columns: - if col.lower() == "filename": - filename_column_present = True - filename_column = col - break - - # Expected behavior is to annotate files if `Filename` is present and if - # file_annotations_upload is set to True regardless of `-mrt` setting - if filename_column_present: + # Expected behavior is to annotate files if `Filename` is present and if file_annotations_upload is set to True regardless of `-mrt` setting + if "filename" in [col.lower() for col in manifest.columns]: # get current list of files and store as dataframe dataset_files = self.getFilesInStorageDataset(datasetId) files_and_entityIds = self._get_file_entityIds( @@ -2306,9 +2295,6 @@ async def add_annotations_to_entities_files( row["entityId"] = manifest_synapse_table_id manifest.loc[idx, "entityId"] = manifest_synapse_table_id entityId = "" - elif filename_column_present and not row[filename_column]: - # Skip any attempt to annotate a row which does not have a filename - entityId = "" else: # get the file id of the file to annotate, collected in above step. entityId = row["entityId"] From 11be01a755d0a32fc68a83f10bde3077222e5d11 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:55:06 -0700 Subject: [PATCH 3/3] Don't attempt to annotate the table --- schematic/store/synapse.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 4c1b900a8..9caffca13 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -2250,7 +2250,10 @@ async def add_annotations_to_entities_files( manifest_synapse_table_id="", annotation_keys: str = "class_label", ): - """Depending on upload type add Ids to entityId row. Add anotations to connected files. + """ + Depending on upload type add Ids to entityId row. Add anotations to connected + files and folders. Despite the name of this function, it also applies to folders. + Args: dmge: DataModelGraphExplorer Object manifest (pd.DataFrame): loaded df containing user supplied data. @@ -2295,6 +2298,9 @@ async def add_annotations_to_entities_files( row["entityId"] = manifest_synapse_table_id manifest.loc[idx, "entityId"] = manifest_synapse_table_id entityId = "" + # If the row is the manifest table, do not add annotations + elif row["entityId"] == manifest_synapse_table_id: + entityId = "" else: # get the file id of the file to annotate, collected in above step. entityId = row["entityId"]