diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 984132b6..8e2d1196 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -15,18 +15,35 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from typing import ( + TYPE_CHECKING, +) +if TYPE_CHECKING: + from nomad.datamodel.data import ( + ArchiveSection, + ) + from nomad.datamodel.datamodel import ( + EntryArchive, + ) + from structlog.stdlib import ( + BoundLogger, + ) -def get_reference(upload_id, entry_id): +def get_reference(upload_id: str, entry_id: str) -> str: return f'../uploads/{upload_id}/archive/{entry_id}#data' -def get_entry_id_from_file_name(file_name, archive): +def get_entry_id_from_file_name(file_name: str, archive: 'EntryArchive') -> str: from nomad.utils import hash return hash(archive.metadata.upload_id, file_name) -def create_archive(entity, archive, file_name) -> str: +def create_archive( + entity: 'ArchiveSection', + archive: 'EntryArchive', + file_name: str, + ) -> str: import json from nomad.datamodel.context import ClientContext if isinstance(archive.m_context, ClientContext): @@ -40,3 +57,52 @@ def create_archive(entity, archive, file_name) -> str: archive.metadata.upload_id, get_entry_id_from_file_name(file_name, archive) ) + + +def merge_sections( + section: 'ArchiveSection', + update: 'ArchiveSection', + logger: 'BoundLogger'=None, + ) -> None: + if update is None: + return + if section is None: + section = update.m_copy() + return + if not isinstance(section, type(update)): + raise TypeError( + 'Cannot merge sections of different types: ' + f'{type(section)} and {type(update)}' + ) + for name, quantity in update.m_def.all_quantities.items(): + if not update.m_is_set(quantity): + continue + if not section.m_is_set(quantity): + section.m_set(quantity, update.m_get(quantity)) + elif ( + quantity.is_scalar and section.m_get(quantity) != update.m_get(quantity) + or quantity.repeats and (section.m_get(quantity) != update.m_get(quantity)).any() + ): + warning = f'Merging sections with different values for quantity "{name}".' + if logger: + logger.warning(warning) + else: + print(warning) + for name, sub_section_def in update.m_def.all_sub_sections.items(): + count = section.m_sub_section_count(sub_section_def) + if count == 0: + for update_sub_section in update.m_get_sub_sections(sub_section_def): + section.m_add_sub_section(sub_section_def, update_sub_section) + elif count == update.m_sub_section_count(sub_section_def): + for i in range(count): + merge_sections( + section.m_get_sub_section(sub_section_def, i), + update.m_get_sub_section(sub_section_def, i), + logger, + ) + elif update.m_sub_section_count(sub_section_def) > 0: + warning = f'Merging sections with different number of "{name}" sub sections.' + if logger: + logger.warning(warning) + else: + print(warning) diff --git a/src/nomad_measurements/xrd/readers.py b/src/nomad_measurements/xrd/readers.py index 5ba5937b..3059cae0 100644 --- a/src/nomad_measurements/xrd/readers.py +++ b/src/nomad_measurements/xrd/readers.py @@ -20,16 +20,23 @@ from typing import ( Dict, Any, + TYPE_CHECKING ) import numpy as np -from structlog.stdlib import ( - BoundLogger, -) from nomad.units import ureg +# from pynxtools.dataconverter.convert import transfer_data_into_template from nomad_measurements.xrd.IKZ import RASXfile, BRMLfile +if TYPE_CHECKING: + from structlog.stdlib import ( + BoundLogger, + ) + + +def transfer_data_into_template(**kwargs): + raise NotImplementedError -def read_panalytical_xrdml(file_path: str, logger: BoundLogger=None) -> Dict[str, Any]: +def read_panalytical_xrdml(file_path: str, logger: 'BoundLogger'=None) -> Dict[str, Any]: ''' Function for reading the X-ray diffraction data in a Panalytical `.xrdml` file. @@ -160,7 +167,7 @@ def find_string(path): } -def read_rigaku_rasx(file_path: str, logger: BoundLogger=None) -> Dict[str, Any]: +def read_rigaku_rasx(file_path: str, logger: 'BoundLogger'=None) -> Dict[str, Any]: ''' Reads .rasx files from Rigaku instruments - reader is based on IKZ module @@ -235,7 +242,7 @@ def set_quantity(value: Any=None, unit: str=None) -> Any: return output -def read_bruker_brml(file_path: str, logger: BoundLogger=None) -> Dict[str, Any]: +def read_bruker_brml(file_path: str, logger: 'BoundLogger'=None) -> Dict[str, Any]: ''' Reads .brml files from Bruker instruments - reader is based on IKZ module @@ -291,24 +298,21 @@ def set_quantity(value: Any=None, unit: str=None) -> Any: return output - -def read_xrd(file_path: str, logger: BoundLogger) -> Dict[str, Any]: +def read_nexus_xrd(file_path: str, logger: 'BoundLogger'=None) -> Dict[str, Any]: ''' - Function for reading an XRD file. + Function for reading the X-ray diffraction data in a Nexus file. Args: - file_path (str): The path of the file to be read. - logger (BoundLogger): A structlog logger. + file_path (str): The path to the X-ray diffraction data file. + logger (BoundLogger, optional): A structlog logger. Defaults to None. Returns: - dict: The parsed and converted data in a common dictionary format. + Dict[str, Any]: The X-ray diffraction data in a Python dictionary. ''' - file_path = os.path.abspath(file_path) - - if file_path.endswith('.xrdml'): - return read_panalytical_xrdml(file_path, logger) - if file_path.endswith('.rasx'): - return read_rigaku_rasx(file_path, logger) - if file_path.endswith('.brml'): - return read_bruker_brml(file_path,logger) - raise ValueError(f'Unsupported file format: {file_path.split(".")[-1]}') + nxdl_name = 'NXxrd_pan' + xrd_template = transfer_data_into_template( + nxdl_name=nxdl_name, + input_file=file_path, + reader='xrd', + ) + return xrd_template diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index d6b338d5..caa2cfa4 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -19,6 +19,7 @@ TYPE_CHECKING, Dict, Any, + Callable, ) import numpy as np import plotly.express as px @@ -57,13 +58,12 @@ PlotSection, PlotlyFigure, ) - +# from nomad.datamodel.metainfo.eln.nexus_data_converter import populate_nexus_subsection from nomad_measurements import ( NOMADMeasurementsCategory, ) -from nomad_measurements.xrd.readers import ( - read_xrd, -) +from nomad_measurements.xrd import readers +from nomad_measurements.utils import merge_sections if TYPE_CHECKING: from nomad.datamodel.datamodel import ( @@ -73,10 +73,52 @@ BoundLogger, ) import pint + from pynxtools.dataconverter.template import Template m_package = Package(name='nomad_xrd') +def populate_nexus_subsection(**kwargs): + raise NotImplementedError + +def handle_nexus_subsection( + xrd_template: 'Template', + nexus_out: str, + archive: 'EntryArchive', + logger: 'BoundLogger' + ): + ''' + Function for populating the NeXus section from the xrd_template. + + Args: + xrd_template (Template): The xrd data in a NeXus Template. + nexus_out (str): The name of the optional NeXus output file. + archive (EntryArchive): The archive containing the section. + logger (BoundLogger): A structlog logger. + ''' + nxdl_name = 'NXxrd_pan' + if nexus_out: + if not nexus_out.endswith('.nxs'): + nexus_out = nexus_out + '.nxs' + populate_nexus_subsection( + template=xrd_template, + app_def=nxdl_name, + archive=archive, + logger=logger, + output_file_path=nexus_out, + on_temp_file=False, + ) + else: + populate_nexus_subsection( + template=xrd_template, + app_def=nxdl_name, + archive=archive, + logger=logger, + output_file_path=nexus_out, + on_temp_file=True, + ) + + def calculate_two_theta_or_q( wavelength: 'pint.Quantity', q: 'pint.Quantity'=None, @@ -141,7 +183,6 @@ class XRayTubeSource(ArchiveSection): xray_tube_material = Quantity( type=MEnum(sorted(['Cu', 'Cr', 'Mo', 'Fe', 'Ag', 'In', 'Ga'])), description='Type of the X-ray tube', - default='Cu', a_eln=ELNAnnotation( component=ELNComponentEnum.EnumEditQuantity, ), @@ -395,6 +436,7 @@ class ELNXRayDiffraction(XRayDiffraction, PlotSection, EntryData): label='X-Ray Diffraction (XRD)', a_eln=ELNAnnotation( lane_width='800px', + hide=['generate_nexus_file'], ), a_template={ 'measurement_identifiers': {}, @@ -414,6 +456,29 @@ class ELNXRayDiffraction(XRayDiffraction, PlotSection, EntryData): diffraction_method_name.m_annotations['eln'] = ELNAnnotation( component=ELNComponentEnum.EnumEditQuantity, ) + generate_nexus_file = Quantity( + type=bool, + description='Whether or not to generate a NeXus output file (if possible).', + a_eln=ELNAnnotation( + component=ELNComponentEnum.BoolEditQuantity, + label='Generate NeXus file', + ), + ) + + def get_read_write_functions(self) -> tuple[Callable, Callable]: + ''' + Method for getting the correct read and write functions for the current data file. + + Returns: + tuple[Callable, Callable]: The read, write functions. + ''' + if self.data_file.endswith('.rasx'): + return readers.read_rigaku_rasx, self.write_xrd_data + if self.data_file.endswith('.xrdml'): + return readers.read_panalytical_xrdml, self.write_xrd_data + if self.data_file.endswith('.brml'): + return readers.read_bruker_brml, self.write_xrd_data + return None, None def write_xrd_data( self, @@ -464,9 +529,120 @@ def write_xrd_data( ) sample.normalize(archive, logger) - self.results = [result] - self.xrd_settings = xrd_settings - self.samples = [sample] + xrd = ELNXRayDiffraction( + results = [result], + xrd_settings = xrd_settings, + samples = [sample], + ) + merge_sections(self, xrd, logger) + + def write_nx_xrd( + self, + xrd_dict: 'Template', + archive: 'EntryArchive', + logger: 'BoundLogger', + ) -> None: + ''' + Populate `ELNXRayDiffraction` section from a NeXus Template. + + Args: + xrd_dict (Dict[str, Any]): A dictionary with the XRD data. + archive (EntryArchive): The archive containing the section. + logger (BoundLogger): A structlog logger. + ''' + result = XRDResult( + intensity=xrd_dict.get( + '/ENTRY[entry]/2theta_plot/intensity', + None, + ), + two_theta=xrd_dict.get( + '/ENTRY[entry]/2theta_plot/two_theta', + None, + ), + omega=xrd_dict.get( + '/ENTRY[entry]/2theta_plot/omega', + None, + ), + chi=xrd_dict.get( + '/ENTRY[entry]/2theta_plot/chi', + None), + phi=xrd_dict.get( + '/ENTRY[entry]/2theta_plot/phi', + None, + ), + scan_axis=xrd_dict.get( + '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis', + None, + ), + integration_time=xrd_dict.get( + '/ENTRY[entry]/COLLECTION[collection]/count_time', + None + ), + ) + result.normalize(archive, logger) + + source = XRayTubeSource( + xray_tube_material=xrd_dict.get( + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material', + None, + ), + kalpha_one=xrd_dict.get( + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one', + None, + ), + kalpha_two=xrd_dict.get( + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two', + None, + ), + ratio_kalphatwo_kalphaone=xrd_dict.get( + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone', + None, + ), + kbeta=xrd_dict.get( + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta', + None, + ), + xray_tube_voltage=xrd_dict.get( + 'ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage', + None + ), + xray_tube_current=xrd_dict.get( + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current', + None, + ), + ) + source.normalize(archive, logger) + + xrd_settings = XRDSettings( + source=source + ) + xrd_settings.normalize(archive, logger) + + sample = CompositeSystemReference( + lab_id=xrd_dict.get( + '/ENTRY[entry]/SAMPLE[sample]/sample_id', + None, + ), + ) + sample.normalize(archive, logger) + + xrd = ELNXRayDiffraction( + results = [result], + xrd_settings = xrd_settings, + samples = [sample], + ) + merge_sections(self, xrd, logger) + + nexus_output = None + if self.generate_nexus_file: + archive_name = archive.metadata.mainfile.split('.')[0] + nexus_output = f'{archive_name}_output.nxs' + handle_nexus_subsection( + xrd_dict, + nexus_output, + archive, + logger, + ) def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): ''' @@ -477,10 +653,16 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): normalized. logger (BoundLogger): A structlog logger. ''' - if not self.results and self.data_file is not None: - with archive.m_context.raw_file(self.data_file) as file: - xrd_dict = read_xrd(file.name, logger) - self.write_xrd_data(xrd_dict, archive, logger) + if self.data_file is not None: + read_function, write_function = self.get_read_write_functions() + if read_function is None or write_function is None: + logger.warn( + f'No compatible reader found for the file: "{self.data_file}".' + ) + else: + with archive.m_context.raw_file(self.data_file) as file: + xrd_dict = read_function(file.name, logger) + write_function(xrd_dict, archive, logger) super().normalize(archive, logger) if not self.results: @@ -507,16 +689,15 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): ) self.figures.extend([ PlotlyFigure( - label="Log Plot", + label='Log Plot', index=1, figure=line_log.to_plotly_json(), ), PlotlyFigure( - label="Linear Plot", + label='Linear Plot', index=2, figure=line_linear.to_plotly_json(), ), ]) - m_package.__init_metainfo__() diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..ae7198ba --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,53 @@ +from nomad.datamodel.metainfo.basesections import ( + Component, + CompositeSystem, + PureSubstanceComponent, + PureSubstanceSection, +) +from nomad_measurements.utils import merge_sections + +def test_merge_sections(): + component_1 = Component( + mass_fraction=1, + ) + component_2 = Component( + name='Cu', + mass_fraction=1, + ) + substance_1 = PureSubstanceSection( + name='Cu', + ) + substance_2 = PureSubstanceSection( + iupac_name='Copper', + ) + component_3 = PureSubstanceComponent( + name='Cu', + pure_substance=substance_1, + ) + component_4 = PureSubstanceComponent( + name='Fe', + pure_substance=substance_2, + ) + component_5 = Component() + component_6 = Component( + name='Fe', + ) + system_1 = CompositeSystem( + components=[component_1, component_3, component_5], + ) + system_2 = CompositeSystem( + components=[component_2, component_4, component_6], + ) + system_3 = CompositeSystem() + merge_sections(system_1, system_2) + assert system_1.components[0].mass_fraction == 1 + assert system_1.components[0].name == 'Cu' + assert system_1.components[1].name == 'Cu' + assert system_1.components[1].pure_substance.name == 'Cu' + assert system_1.components[1].pure_substance.iupac_name == 'Copper' + assert system_1.components[2].name == 'Fe' + merge_sections(system_3, system_2) + assert system_3.components[0].name == 'Cu' + +if __name__ == '__main__': + test_merge_sections()