From 8326872c40ec41cdbd4e5ca82ae40aa4abd47b86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= <hampus.nasstrom@gmail.com>
Date: Fri, 8 Dec 2023 16:40:02 +0100
Subject: [PATCH] Modified XRD Reading and Writing Process (#28)

* Modified XRD Reading and Writing Process

Added method for getting read and write functions

Added empty NeXus read and write functions

* Fixed Typo

* updating nexus reader and wrote.

* Populate nexus section.

* Including nexus reader.

* update

* update.

* fixing plot.

* fixing plot.

* Changed to Call Read and Write Every Normalize

Added utils function for merging sections

Changed to call file read and write every time

Added merging of read data into self

Linting

* Commented Out NeXus Reader Integration

* Changed to Not Warn For Empty Update SubSections

* Linting

* Added Quotes on Type Hinted BoundLogger

* Removed read_xrd function

Added .brml to get_read_write_functions method

* Removed call to missing read or write  function

---------

Co-authored-by: Rubel <rubel.mozumder@outlook.com>
---
 src/nomad_measurements/utils.py       |  72 ++++++++-
 src/nomad_measurements/xrd/readers.py |  46 +++---
 src/nomad_measurements/xrd/schema.py  | 211 ++++++++++++++++++++++++--
 tests/test_utils.py                   |  53 +++++++
 4 files changed, 343 insertions(+), 39 deletions(-)
 create mode 100644 tests/test_utils.py

diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py
index 984132b6..8e2d1196 100644
--- a/src/nomad_measurements/utils.py
+++ b/src/nomad_measurements/utils.py
@@ -15,18 +15,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from typing import (
+    TYPE_CHECKING,
+)
+if TYPE_CHECKING:
+    from nomad.datamodel.data import (
+        ArchiveSection,
+    )
+    from nomad.datamodel.datamodel import (
+        EntryArchive,
+    )
+    from structlog.stdlib import (
+        BoundLogger,
+    )
 
 
-def get_reference(upload_id, entry_id):
+def get_reference(upload_id: str, entry_id: str) -> str:
     return f'../uploads/{upload_id}/archive/{entry_id}#data'
 
 
-def get_entry_id_from_file_name(file_name, archive):
+def get_entry_id_from_file_name(file_name: str, archive: 'EntryArchive') -> str:
     from nomad.utils import hash
     return hash(archive.metadata.upload_id, file_name)
 
 
-def create_archive(entity, archive, file_name) -> str:
+def create_archive(
+        entity: 'ArchiveSection',
+        archive: 'EntryArchive',
+        file_name: str,
+    ) -> str:
     import json
     from nomad.datamodel.context import ClientContext
     if isinstance(archive.m_context, ClientContext):
@@ -40,3 +57,52 @@ def create_archive(entity, archive, file_name) -> str:
         archive.metadata.upload_id,
         get_entry_id_from_file_name(file_name, archive)
     )
+
+
+def merge_sections(
+        section: 'ArchiveSection',
+        update: 'ArchiveSection',
+        logger: 'BoundLogger'=None,
+    ) -> None:
+    if update is None:
+        return
+    if section is None:
+        section = update.m_copy()
+        return
+    if not isinstance(section, type(update)):
+        raise TypeError(
+            'Cannot merge sections of different types: '
+            f'{type(section)} and {type(update)}'
+        )
+    for name, quantity in update.m_def.all_quantities.items():
+        if not update.m_is_set(quantity):
+            continue
+        if not section.m_is_set(quantity):
+            section.m_set(quantity, update.m_get(quantity))
+        elif (
+            quantity.is_scalar and section.m_get(quantity) != update.m_get(quantity)
+            or quantity.repeats and (section.m_get(quantity) != update.m_get(quantity)).any()
+        ):
+            warning = f'Merging sections with different values for quantity "{name}".'
+            if logger:
+                logger.warning(warning)
+            else:
+                print(warning)
+    for name, sub_section_def in update.m_def.all_sub_sections.items():
+        count = section.m_sub_section_count(sub_section_def)
+        if count == 0:
+            for update_sub_section in update.m_get_sub_sections(sub_section_def):
+                section.m_add_sub_section(sub_section_def, update_sub_section)
+        elif count == update.m_sub_section_count(sub_section_def):
+            for i in range(count):
+                merge_sections(
+                    section.m_get_sub_section(sub_section_def, i),
+                    update.m_get_sub_section(sub_section_def, i),
+                    logger,
+                )
+        elif update.m_sub_section_count(sub_section_def) > 0:
+            warning = f'Merging sections with different number of "{name}" sub sections.'
+            if logger:
+                logger.warning(warning)
+            else:
+                print(warning)
diff --git a/src/nomad_measurements/xrd/readers.py b/src/nomad_measurements/xrd/readers.py
index 5ba5937b..3059cae0 100644
--- a/src/nomad_measurements/xrd/readers.py
+++ b/src/nomad_measurements/xrd/readers.py
@@ -20,16 +20,23 @@
 from typing import (
     Dict,
     Any,
+    TYPE_CHECKING
 )
 import numpy as np
-from structlog.stdlib import (
-    BoundLogger,
-)
 from nomad.units import ureg
+# from pynxtools.dataconverter.convert import transfer_data_into_template
 from nomad_measurements.xrd.IKZ import RASXfile, BRMLfile
 
+if TYPE_CHECKING:
+    from structlog.stdlib import (
+        BoundLogger,
+    )
+
+
+def transfer_data_into_template(**kwargs):
+    raise NotImplementedError
 
-def read_panalytical_xrdml(file_path: str, logger: BoundLogger=None) -> Dict[str, Any]:
+def read_panalytical_xrdml(file_path: str, logger: 'BoundLogger'=None) -> Dict[str, Any]:
     '''
     Function for reading the X-ray diffraction data in a Panalytical `.xrdml` file.
 
@@ -160,7 +167,7 @@ def find_string(path):
     }
 
 
-def read_rigaku_rasx(file_path: str, logger: BoundLogger=None) -> Dict[str, Any]:
+def read_rigaku_rasx(file_path: str, logger: 'BoundLogger'=None) -> Dict[str, Any]:
     '''
     Reads .rasx files from Rigaku instruments
         - reader is based on IKZ module
@@ -235,7 +242,7 @@ def set_quantity(value: Any=None, unit: str=None) -> Any:
 
     return output
 
-def read_bruker_brml(file_path: str, logger: BoundLogger=None) -> Dict[str, Any]:
+def read_bruker_brml(file_path: str, logger: 'BoundLogger'=None) -> Dict[str, Any]:
     '''
     Reads .brml files from Bruker instruments
         - reader is based on IKZ module
@@ -291,24 +298,21 @@ def set_quantity(value: Any=None, unit: str=None) -> Any:
 
     return output
 
-
-def read_xrd(file_path: str, logger: BoundLogger) -> Dict[str, Any]:
+def read_nexus_xrd(file_path: str, logger: 'BoundLogger'=None) -> Dict[str, Any]:
     '''
-    Function for reading an XRD file.
+    Function for reading the X-ray diffraction data in a Nexus file.
 
     Args:
-        file_path (str): The path of the file to be read.
-        logger (BoundLogger): A structlog logger.
+        file_path (str): The path to the X-ray diffraction data file.
+        logger (BoundLogger, optional): A structlog logger. Defaults to None.
 
     Returns:
-        dict: The parsed and converted data in a common dictionary format.
+        Dict[str, Any]: The X-ray diffraction data in a Python dictionary.
     '''
-    file_path = os.path.abspath(file_path)
-
-    if file_path.endswith('.xrdml'):
-        return read_panalytical_xrdml(file_path, logger)
-    if file_path.endswith('.rasx'):
-        return read_rigaku_rasx(file_path, logger)
-    if file_path.endswith('.brml'):
-        return read_bruker_brml(file_path,logger)
-    raise ValueError(f'Unsupported file format: {file_path.split(".")[-1]}')
+    nxdl_name = 'NXxrd_pan'
+    xrd_template = transfer_data_into_template(
+        nxdl_name=nxdl_name,
+        input_file=file_path,
+        reader='xrd',
+    )
+    return xrd_template
diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py
index d6b338d5..caa2cfa4 100644
--- a/src/nomad_measurements/xrd/schema.py
+++ b/src/nomad_measurements/xrd/schema.py
@@ -19,6 +19,7 @@
     TYPE_CHECKING,
     Dict,
     Any,
+    Callable,
 )
 import numpy as np
 import plotly.express as px
@@ -57,13 +58,12 @@
     PlotSection,
     PlotlyFigure,
 )
-
+# from nomad.datamodel.metainfo.eln.nexus_data_converter import populate_nexus_subsection
 from nomad_measurements import (
     NOMADMeasurementsCategory,
 )
-from nomad_measurements.xrd.readers import (
-    read_xrd,
-)
+from nomad_measurements.xrd import readers
+from nomad_measurements.utils import merge_sections
 
 if TYPE_CHECKING:
     from nomad.datamodel.datamodel import (
@@ -73,10 +73,52 @@
         BoundLogger,
     )
     import pint
+    from pynxtools.dataconverter.template import Template
 
 m_package = Package(name='nomad_xrd')
 
 
+def populate_nexus_subsection(**kwargs):
+    raise NotImplementedError
+
+def handle_nexus_subsection(
+        xrd_template: 'Template',
+        nexus_out: str,
+        archive: 'EntryArchive',
+        logger: 'BoundLogger'
+    ):
+    '''
+    Function for populating the NeXus section from the xrd_template.
+
+    Args:
+        xrd_template (Template): The xrd data in a NeXus Template.
+        nexus_out (str): The name of the optional NeXus output file.
+        archive (EntryArchive): The archive containing the section.
+        logger (BoundLogger): A structlog logger.
+    '''
+    nxdl_name = 'NXxrd_pan'
+    if nexus_out:
+        if not nexus_out.endswith('.nxs'):
+            nexus_out = nexus_out + '.nxs'
+        populate_nexus_subsection(
+            template=xrd_template,
+            app_def=nxdl_name,
+            archive=archive,
+            logger=logger,
+            output_file_path=nexus_out,
+            on_temp_file=False,
+        )
+    else:
+        populate_nexus_subsection(
+            template=xrd_template,
+            app_def=nxdl_name,
+            archive=archive,
+            logger=logger,
+            output_file_path=nexus_out,
+            on_temp_file=True,
+        )
+
+
 def calculate_two_theta_or_q(
         wavelength: 'pint.Quantity',
         q: 'pint.Quantity'=None,
@@ -141,7 +183,6 @@ class XRayTubeSource(ArchiveSection):
     xray_tube_material = Quantity(
         type=MEnum(sorted(['Cu', 'Cr', 'Mo', 'Fe', 'Ag', 'In', 'Ga'])),
         description='Type of the X-ray tube',
-        default='Cu',
         a_eln=ELNAnnotation(
             component=ELNComponentEnum.EnumEditQuantity,
         ),
@@ -395,6 +436,7 @@ class ELNXRayDiffraction(XRayDiffraction, PlotSection, EntryData):
         label='X-Ray Diffraction (XRD)',
         a_eln=ELNAnnotation(
             lane_width='800px',
+            hide=['generate_nexus_file'],
         ),
         a_template={
             'measurement_identifiers': {},
@@ -414,6 +456,29 @@ class ELNXRayDiffraction(XRayDiffraction, PlotSection, EntryData):
     diffraction_method_name.m_annotations['eln'] = ELNAnnotation(
         component=ELNComponentEnum.EnumEditQuantity,
     )
+    generate_nexus_file = Quantity(
+        type=bool,
+        description='Whether or not to generate a NeXus output file (if possible).',
+        a_eln=ELNAnnotation(
+            component=ELNComponentEnum.BoolEditQuantity,
+            label='Generate NeXus file',
+        ),
+    )
+
+    def get_read_write_functions(self) -> tuple[Callable, Callable]:
+        '''
+        Method for getting the correct read and write functions for the current data file.
+
+        Returns:
+            tuple[Callable, Callable]: The read, write functions.
+        '''
+        if self.data_file.endswith('.rasx'):
+            return readers.read_rigaku_rasx, self.write_xrd_data
+        if self.data_file.endswith('.xrdml'):
+            return readers.read_panalytical_xrdml, self.write_xrd_data
+        if self.data_file.endswith('.brml'):
+            return readers.read_bruker_brml, self.write_xrd_data
+        return None, None
 
     def write_xrd_data(
             self,
@@ -464,9 +529,120 @@ def write_xrd_data(
         )
         sample.normalize(archive, logger)
 
-        self.results = [result]
-        self.xrd_settings = xrd_settings
-        self.samples = [sample]
+        xrd = ELNXRayDiffraction(
+            results = [result],
+            xrd_settings = xrd_settings,
+            samples = [sample],
+        )
+        merge_sections(self, xrd, logger)
+
+    def write_nx_xrd(
+            self,
+            xrd_dict: 'Template',
+            archive: 'EntryArchive',
+            logger: 'BoundLogger',
+        ) -> None:
+        '''
+        Populate `ELNXRayDiffraction` section from a NeXus Template.
+
+        Args:
+            xrd_dict (Dict[str, Any]): A dictionary with the XRD data.
+            archive (EntryArchive): The archive containing the section.
+            logger (BoundLogger): A structlog logger.
+        '''
+        result = XRDResult(
+            intensity=xrd_dict.get(
+                '/ENTRY[entry]/2theta_plot/intensity',
+                None,
+            ),
+            two_theta=xrd_dict.get(
+                '/ENTRY[entry]/2theta_plot/two_theta',
+                None,
+            ),
+            omega=xrd_dict.get(
+                '/ENTRY[entry]/2theta_plot/omega',
+                None,
+            ),
+            chi=xrd_dict.get(
+                '/ENTRY[entry]/2theta_plot/chi',
+                None),
+            phi=xrd_dict.get(
+                '/ENTRY[entry]/2theta_plot/phi',
+                None,
+            ),
+            scan_axis=xrd_dict.get(
+                '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis',
+                None,
+            ),
+            integration_time=xrd_dict.get(
+                '/ENTRY[entry]/COLLECTION[collection]/count_time',
+                None
+            ),
+        )
+        result.normalize(archive, logger)
+
+        source = XRayTubeSource(
+            xray_tube_material=xrd_dict.get(
+                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material',
+                None,
+            ),
+            kalpha_one=xrd_dict.get(
+                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one', 
+                None,
+            ),
+            kalpha_two=xrd_dict.get(
+                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two',
+                None,
+                ),
+            ratio_kalphatwo_kalphaone=xrd_dict.get(
+                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone', 
+                None,
+                ),
+            kbeta=xrd_dict.get(
+                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta',
+                None,
+            ),
+            xray_tube_voltage=xrd_dict.get(
+                'ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage',
+                None
+            ),
+            xray_tube_current=xrd_dict.get(
+                '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current',
+                None,
+            ),
+        )
+        source.normalize(archive, logger)
+
+        xrd_settings = XRDSettings(
+            source=source
+        )
+        xrd_settings.normalize(archive, logger)
+
+        sample = CompositeSystemReference(
+            lab_id=xrd_dict.get(
+                '/ENTRY[entry]/SAMPLE[sample]/sample_id', 
+                None,
+                ),
+        )
+        sample.normalize(archive, logger)
+
+        xrd = ELNXRayDiffraction(
+            results = [result],
+            xrd_settings = xrd_settings,
+            samples = [sample],
+        )
+        merge_sections(self, xrd, logger)
+
+        nexus_output = None
+        if self.generate_nexus_file:
+            archive_name = archive.metadata.mainfile.split('.')[0]
+            nexus_output = f'{archive_name}_output.nxs'
+        handle_nexus_subsection(
+            xrd_dict,
+            nexus_output,
+            archive,
+            logger,
+        )
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         '''
@@ -477,10 +653,16 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
             normalized.
             logger (BoundLogger): A structlog logger.
         '''
-        if not self.results and self.data_file is not None:
-            with archive.m_context.raw_file(self.data_file) as file:
-                xrd_dict = read_xrd(file.name, logger)
-            self.write_xrd_data(xrd_dict, archive, logger)
+        if self.data_file is not None:
+            read_function, write_function = self.get_read_write_functions()
+            if read_function is None or write_function is None:
+                logger.warn(
+                    f'No compatible reader found for the file: "{self.data_file}".'
+                )
+            else:
+                with archive.m_context.raw_file(self.data_file) as file:
+                    xrd_dict = read_function(file.name, logger)
+                write_function(xrd_dict, archive, logger)
         super().normalize(archive, logger)
 
         if not self.results:
@@ -507,16 +689,15 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'):
         )
         self.figures.extend([
             PlotlyFigure(
-                label="Log Plot",
+                label='Log Plot',
                 index=1,
                 figure=line_log.to_plotly_json(),
             ),
             PlotlyFigure(
-                label="Linear Plot",
+                label='Linear Plot',
                 index=2,
                 figure=line_linear.to_plotly_json(),
             ),
         ])
 
-
 m_package.__init_metainfo__()
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 00000000..ae7198ba
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,53 @@
+from nomad.datamodel.metainfo.basesections import (
+    Component,
+    CompositeSystem,
+    PureSubstanceComponent,
+    PureSubstanceSection,
+)
+from nomad_measurements.utils import merge_sections
+
+def test_merge_sections():
+    component_1 = Component(
+        mass_fraction=1,
+    )
+    component_2 = Component(
+        name='Cu',
+        mass_fraction=1,
+    )
+    substance_1 = PureSubstanceSection(
+        name='Cu',
+    )
+    substance_2 = PureSubstanceSection(
+        iupac_name='Copper',
+    )
+    component_3 = PureSubstanceComponent(
+        name='Cu',
+        pure_substance=substance_1,
+    )
+    component_4 = PureSubstanceComponent(
+        name='Fe',
+        pure_substance=substance_2,
+    )
+    component_5 = Component()
+    component_6 = Component(
+        name='Fe',
+    )
+    system_1 = CompositeSystem(
+        components=[component_1, component_3, component_5],
+    )
+    system_2 = CompositeSystem(
+        components=[component_2, component_4, component_6],
+    )
+    system_3 = CompositeSystem()
+    merge_sections(system_1, system_2)
+    assert system_1.components[0].mass_fraction == 1
+    assert system_1.components[0].name == 'Cu'
+    assert system_1.components[1].name == 'Cu'
+    assert system_1.components[1].pure_substance.name == 'Cu'
+    assert system_1.components[1].pure_substance.iupac_name == 'Copper'
+    assert system_1.components[2].name == 'Fe'
+    merge_sections(system_3, system_2)
+    assert system_3.components[0].name == 'Cu'
+
+if __name__ == '__main__':
+    test_merge_sections()