Added control logic to instruct the dataconverter to create the outpu…

…t file based on an existent input-file to which then the template data are appended. This functionality is useful in cases when a scientific software has already generated a NeXus file but just some additional pieces of information are missing for the injection into the RDM. Examples of such missing info could be users, samples, project information, etc. This proof-of-concept implementation copies that input file and subsequently opens it as the output file and appends the template data subsequently. The current implementation does not verify though the NeXus content of this inputfile. However, in the future, this could be useful. The question is when to verify this and how: right after the copy?, after the template data were written?, or via loading all input file content first into the template and verify it as usual prior to writing to disk? While the latter idea enables overwriting content from inputfiles, the disadvantage is that template data might become too costly (wrt to memory demands, irrespective whether on the client or server side).
FAIRmat-NFDI · Aug 17, 2023 · e1bb68b · e1bb68b
1 parent 71371c7
commit e1bb68b
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 1 deletion.
diff --git a/processed.nxs.mtex.zip b/processed.nxs.mtex.zip
diff --git a/pynxtools/dataconverter/convert.py b/pynxtools/dataconverter/convert.py
@@ -22,12 +22,14 @@
 import logging
 import os
 import sys
+from shutil import copyfile
 from typing import List, Tuple
 import xml.etree.ElementTree as ET
 
 import click
 import yaml
 
+
 from pynxtools.dataconverter.readers.base.reader import BaseReader
 from pynxtools.dataconverter import helpers
 from pynxtools.dataconverter.writer import Writer
@@ -63,6 +65,40 @@ def get_names_of_all_readers() -> List[str]:
     return all_readers
 
 
+def append_template_data_to_acopy_of_one_inputfile(input: Tuple[str], output: str):
+    """Helper function to build outputfile based on one inputfile plus template data."""
+    # There are cases in which one of the inputfiles may contain already NeXus content
+    # typically because the scientific software tool generates such a file
+    # matching a specific application definition and thus additional pieces of information
+    # inside the template (e.g. from an ELN) should just be added to that inputfile
+
+    # one may or not in this case demand for a verification of that input file
+    # before continuing, currently we ignore this verification
+    for file_name in input:
+        if file_name[0:file_name.rfind('.')] != output:
+            continue
+        else:
+            print(f"Creating the output {output} based the this input {file_name}\n" \
+                  f"NeXus content in {file_name} is currently not verified !!!")
+            copyfile(file_name, output)
+
+            print(f"Template data will be added to the output {output}...\n" \
+                  f"Only these template data will be verified !!!")
+    # when calling dataconverter with
+    # --input-file processed.nxs.mtex
+    # --output processed.nxs
+    # -- io_mode="r+"
+    # these calls can be executed repetitively as the first step is
+    # the copying operation of *.nxs.mtex to *.nxs and then the access on the *.nxs
+    # file using h5py is then read/write without regeneration
+    # a repeated call has factually the same effect as the dataconverter
+    # used to work i.e. using h5py with "w" would regenerate the *.nxs if already existent
+    # this is a required to assure that repetitive calls of the ELN save function
+    # in NOMAD do not end up with write conflicts on the *.nxs i.e. the output file
+    # when the dataconverter is called
+    return
+
+
 # pylint: disable=too-many-arguments
 def convert(input_file: Tuple[str],
             reader: str,
@@ -125,6 +161,10 @@ def convert(input_file: Tuple[str],
             continue
         logger.warning("The path, %s, is being written but has no documentation.", path)
 
+    if io_mode == "r+":
+        append_template_data_to_acopy_of_one_inputfile(
+            input=input_file, output=output)
+
     Writer(data=data, nxdl_path=nxdl_path, output_path=output, io_mode=io_mode).write()
 
     logger.info("The output file generated: %s", output)

diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
@@ -63,7 +63,7 @@ def read(self,
         # pylint: disable=duplicate-code
         template.clear()
 
-        debug_id = 2
+        debug_id = 3
         template[f"/ENTRY[entry1]/test{debug_id}"] = f"test{debug_id}"
         # this em_om parser combines multiple sub-parsers
         # so we need the following input: