diff --git a/dev-requirements.txt b/dev-requirements.txt index 3ba04e52f..71dbce60a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -104,6 +104,7 @@ h5py==3.7.0 # pyfai # pynxtools (pyproject.toml) # silx + # xrayutilities hyperspy==1.7.4 # via # kikuchipy @@ -166,7 +167,9 @@ lazy-object-proxy==1.9.0 llvmlite==0.39.1 # via numba lmfit==1.2.0 - # via pyxem + # via + # pyxem + # xrayutilities locket==1.0.0 # via partd markupsafe==2.1.1 @@ -276,6 +279,7 @@ numpy==1.21.6 # sparse # tifffile # xarray + # xrayutilities # zarr numpy-quaternion==2022.4.3 # via orix @@ -432,6 +436,7 @@ scipy==1.7.3 # scikit-image # scikit-learn # sparse + # xrayutilities silx==1.1.2 # via pyfai six==1.16.0 @@ -536,6 +541,8 @@ wrapt==1.14.1 # via astroid xarray==0.20.2 # via pynxtools (pyproject.toml) +xrayutilities==1.7.6 + # via pynxtools (pyproject.toml) zarr==2.12.0 # via hyperspy zipfile37==0.1.3 diff --git a/pynxtools/dataconverter/convert.py b/pynxtools/dataconverter/convert.py index 3db435c78..c33639e84 100644 --- a/pynxtools/dataconverter/convert.py +++ b/pynxtools/dataconverter/convert.py @@ -22,7 +22,7 @@ import logging import os import sys -from typing import List, Tuple +from typing import List, Tuple, Optional import xml.etree.ElementTree as ET import click @@ -64,16 +64,26 @@ def get_names_of_all_readers() -> List[str]: return all_readers -# pylint: disable=too-many-arguments,too-many-locals -def convert(input_file: Tuple[str, ...], - reader: str, - nxdl: str, - output: str, - generate_template: bool = False, - fair: bool = False, - undocumented: bool = False, - **kwargs): - """The conversion routine that takes the input parameters and calls the necessary functions.""" +def get_nxdl_root_and_path(nxdl: str): + """Get xml root element and file path from nxdl name e.g. NXapm. + + Parameters + ---------- + nxdl: str + Name of nxdl file e.g. NXapm from NXapm.nxdl.xml. + + Returns + ------- + ET.root + Root element of nxdl file. + str + Path of nxdl file. + + Raises + ------ + FileNotFoundError + Error if no file with the given nxdl name is found. + """ # Reading in the NXDL and generating a template definitions_path = nexus.get_nexus_definitions_path() if nxdl == "NXtest": @@ -86,30 +96,56 @@ def convert(input_file: Tuple[str, ...], nxdl_path = os.path.join(definitions_path, "contributed_definitions", f"{nxdl}.nxdl.xml") if not os.path.exists(nxdl_path): nxdl_path = os.path.join(definitions_path, "applications", f"{nxdl}.nxdl.xml") + if not os.path.exists(nxdl_path): + nxdl_path = os.path.join(definitions_path, "base_classes", f"{nxdl}.nxdl.xml") if not os.path.exists(nxdl_path): raise FileNotFoundError(f"The nxdl file, {nxdl}, was not found.") - nxdl_root = ET.parse(nxdl_path).getroot() + return ET.parse(nxdl_path).getroot(), nxdl_path - if undocumented: - logger.setLevel(UNDOCUMENTED) + +def transfer_data_into_template(input_file, + reader, nxdl_name, + nxdl_root: Optional[ET.Element] = None, + **kwargs): + """Transfer parse and merged data from input experimental file, config file and eln. + + Experimental and eln files will be parsed and finally will be merged into template. + Before returning the template validate the template data. + + Parameters + ---------- + input_file : Union[tuple[str], str] + Tuple of files or file + reader: str + Name of reader such as xps + nxdl_name : str + Root name of nxdl file, e.g. NXmpes from NXmpes.nxdl.xml + nxdl_root : ET.element + Root element of nxdl file, otherwise provide nxdl_name + + Returns + ------- + Template + Template filled with data from raw file and eln file. + + """ + if nxdl_root is None: + nxdl_root, _ = get_nxdl_root_and_path(nxdl=nxdl_name) template = Template() helpers.generate_template_from_nxdl(nxdl_root, template) - if generate_template: - logger.info(template) - return - # Setting up all the input data if isinstance(input_file, str): input_file = (input_file,) + bulletpoint = "\n\u2022 " logger.info("Using %s reader to convert the given files: %s ", reader, bulletpoint.join((" ", *input_file))) data_reader = get_reader(reader) - if not (nxdl in data_reader.supported_nxdls or "*" in data_reader.supported_nxdls): + if not (nxdl_name in data_reader.supported_nxdls or "*" in data_reader.supported_nxdls): raise NotImplementedError("The chosen NXDL isn't supported by the selected reader.") data = data_reader().read( # type: ignore[operator] @@ -118,6 +154,56 @@ def convert(input_file: Tuple[str, ...], **kwargs, ) helpers.validate_data_dict(template, data, nxdl_root) + return data + + +# pylint: disable=too-many-arguments,too-many-locals +def convert(input_file: Tuple[str], + reader: str, + nxdl: str, + output: str, + generate_template: bool = False, + fair: bool = False, + undocumented: bool = False, + **kwargs): + """The conversion routine that takes the input parameters and calls the necessary functions. + + Parameters + ---------- + input_file : Tuple[str] + Tuple of files or file + reader: str + Name of reader such as xps + nxdl : str + Root name of nxdl file, e.g. NXmpes for NXmpes.nxdl.xml + output : str + Output file name. + generate_template : bool, default False + True if user wants template in logger info. + fair : bool, default False + If True, a warning is given that there are undocumented paths + in the template. + undocumented : bool, default False + If True, an undocumented warning is given. + + Returns + ------- + None. + """ + + nxdl_root, nxdl_path = get_nxdl_root_and_path(nxdl) + + if generate_template: + template = Template() + helpers.generate_template_from_nxdl(nxdl_root, template) + logger.info(template) + return + + data = transfer_data_into_template(input_file=input_file, reader=reader, + nxdl_name=nxdl, nxdl_root=nxdl_root, + **kwargs) + if undocumented: + logger.setLevel(UNDOCUMENTED) if fair and data.undocumented.keys(): logger.warning("There are undocumented paths in the template. This is not acceptable!") return @@ -136,6 +222,28 @@ def convert(input_file: Tuple[str, ...], logger.info("The output file generated: %s", output) +def convert_and_return_template(input_file: Tuple[str], + reader: str, + nxdl: str, + output: str, + generate_template: bool = False, + fair: bool = False, + undocumented: bool = False, + **kwargs): + + """Convert input files into structure data according template and return template. + + This function only is special than convert function by return value which is filled data + template. + """ + temp_data_dict = {'data': None} + convert(input_file=input_file, reader=reader, + nxdl=nxdl, output=output, generate_template=generate_template, + fair=fair, undocumented=undocumented, temp_data_dict=temp_data_dict, **kwargs) + + return temp_data_dict['data'] + + def parse_params_file(params_file): """Parses the parameters from a given dictionary and returns them""" params = yaml.load(params_file, Loader=yaml.Loader)['dataconverter'] diff --git a/pynxtools/dataconverter/helpers.py b/pynxtools/dataconverter/helpers.py index f9da1b300..c1e8c2280 100644 --- a/pynxtools/dataconverter/helpers.py +++ b/pynxtools/dataconverter/helpers.py @@ -17,12 +17,13 @@ # """Helper functions commonly used by the convert routine.""" -from typing import List +from typing import List, Optional, Any from typing import Tuple, Callable, Union import re import xml.etree.ElementTree as ET from datetime import datetime, timezone import logging +import json import numpy as np from ase.data import chemical_symbols @@ -650,3 +651,72 @@ def extract_atom_types(formula, mode='hill'): return convert_to_hill(atom_types) return atom_types + + +def transform_to_intended_dt(str_value: Any) -> Optional[Any]: + """Transform string to the intended data type, if not then return str_value. + + E.g '2.5E-2' will be transfor into 2.5E-2 + tested with: '2.4E-23', '28', '45.98', 'test', ['59', '3.00005', '498E-34'], + '23 34 444 5000', None + with result: 2.4e-23, 28, 45.98, test, [5.90000e+01 3.00005e+00 4.98000e-32], + np.array([23 34 444 5000]), None + NOTE: add another arg in this func for giving 'hint' what kind of data like + numpy array or list + Parameters + ---------- + str_value : str + Data from other format that comes as string e.g. string of list. + + Returns + ------- + Union[str, int, float, np.ndarray] + Converted data type + """ + + symbol_list_for_data_seperation = [';', ' '] + transformed: Any = None + + if isinstance(str_value, list): + try: + transformed = np.array(str_value, dtype=np.float64) + return transformed + except ValueError: + pass + + if isinstance(str_value, np.ndarray): + return str_value + if isinstance(str_value, str): + try: + transformed = int(str_value) + except ValueError: + try: + transformed = float(str_value) + except ValueError: + if '[' in str_value and ']' in str_value: + transformed = json.loads(str_value) + if transformed: + return transformed + for sym in symbol_list_for_data_seperation: + if sym in str_value: + parts = str_value.split(sym) + modified_parts = [] + for part in parts: + modified_parts.append(transform_to_intended_dt(part)) + return transform_to_intended_dt(modified_parts) + + return str_value + + +def nested_dict_to_slash_separated_path(nested_dict: dict, + flattened_dict: dict, + parent_path=''): + """Convert nested dict into slash separeted path upto certain level.""" + sep = '/' + + for key, val in nested_dict.items(): + path = parent_path + sep + key + if isinstance(val, dict): + nested_dict_to_slash_separated_path(val, flattened_dict, path) + else: + flattened_dict[path] = val diff --git a/pynxtools/dataconverter/readers/sts/helper.py b/pynxtools/dataconverter/readers/sts/helper.py index 63c546798..6ad99ca8a 100644 --- a/pynxtools/dataconverter/readers/sts/helper.py +++ b/pynxtools/dataconverter/readers/sts/helper.py @@ -19,9 +19,10 @@ from typing import Tuple import copy -import json import numpy as np -from pynxtools.dataconverter.helpers import convert_data_dict_path_to_hdf5_path +from pynxtools.dataconverter.helpers import (convert_data_dict_path_to_hdf5_path, + transform_to_intended_dt, + ) # Here are some data or data type or unit or data to skip: @@ -41,12 +42,12 @@ def fill_template_from_eln_data(eln_data_dict, template): """ for e_key, e_val in eln_data_dict.items(): - template[e_key] = to_intended_t(e_val) + template[e_key] = transform_to_intended_dt(e_val) -def work_out_overwriteable_field(template, data_dict, - sub_config_dict, nexus_path, - dict_orig_key_to_mod_key): +def overwrite_fields(template, data_dict, + sub_config_dict, nexus_path, + dict_orig_key_to_mod_key): """ Overwrite a field for multiple dimention of the same type of physical quantity. @@ -93,13 +94,14 @@ def work_out_overwriteable_field(template, data_dict, dict_orig_key_to_mod_key[nexus_path] = new_temp_key if value in data_path: path_to_data = data_path[value] - template[new_temp_key] = to_intended_t(data_dict[path_to_data] - if path_to_data in data_dict else None) + template[new_temp_key] = transform_to_intended_dt(data_dict[path_to_data] + if path_to_data in data_dict + else None) if unit in data_path: path_to_data = data_path[unit] - template[new_temp_key + "/@units"] = to_intended_t(data_dict[path_to_data] - if path_to_data in data_dict - else None) + unit = transform_to_intended_dt(data_dict[path_to_data] if path_to_data in data_dict + else None) + template[new_temp_key + "/@units"] = unit def nested_path_to_slash_separated_path(nested_dict: dict, @@ -264,60 +266,3 @@ def slice_before_last_element(np_array): if not isinstance(np_array, np.ndarray) and not len(np.shape(np_array)) == 1: raise ValueError('Please provide a numpy array of 1D.') return np_array[:-1] - - -# pylint: disable=too-many-return-statements -def to_intended_t(str_value): - """ - Transform string to the intended data type, if not then return str_value. - e.g '2.5E-2' will be transfor into 2.5E-2 - tested with: '2.4E-23', '28', '45.98', 'test', ['59', '3.00005', '498E-34'], None - with result: 2.4e-23, 28, 45.98, test, [5.90000e+01 3.00005e+00 4.98000e-32], None - - Parameters - ---------- - str_value : _type_ - _description_ - - Returns - ------- - Union[str, int, float, np.ndarray] - Converted data type - """ - symbol_list_for_data_seperation = [';'] - transformed = "" - if str_value is None: - return str_value - - if isinstance(str_value, list): - str_value = list(str_value) - try: - transformed = np.array(str_value, dtype=np.float64) - return transformed - except ValueError: - pass - - if isinstance(str_value, np.ndarray): - return str_value - if isinstance(str_value, str): - try: - transformed = int(str_value) - return transformed - except ValueError: - try: - transformed = float(str_value) - return transformed - except ValueError: - if '[' in str_value and ']' in str_value: - transformed = json.loads(str_value) - return transformed - - for sym in symbol_list_for_data_seperation: - if sym in str_value: - parts = str_value.split(sym) - modified_parts = [] - for part in parts: - modified_parts.append(to_intended_t(part)) - return modified_parts - - return str_value diff --git a/pynxtools/dataconverter/readers/sts/reader.py b/pynxtools/dataconverter/readers/sts/reader.py index 9929c1759..e6698b834 100644 --- a/pynxtools/dataconverter/readers/sts/reader.py +++ b/pynxtools/dataconverter/readers/sts/reader.py @@ -195,32 +195,31 @@ def read(self, fl_obj: object if ext in ['sxm', 'dat']: data_file = file - if ext == 'json': - with open(file, mode="r", encoding="utf-8") as fl_obj: - config_dict = json.load(fl_obj) if ext in ['yaml', 'yml']: - with open(file, mode="r", encoding="utf-8") as fl_obj: + with open(file, encoding="utf-8", mode="r") as fl_obj: eln_dict = flatten_and_replace( FlattenSettings( yaml.safe_load(fl_obj), - CONVERT_DICT, - REPLACE_NESTED + replace_nested=REPLACE_NESTED, + convert_dict=CONVERT_DICT, ) ) + if ext == 'json': + with open(file, mode="r", encoding="utf-8") as fl_obj: + config_dict = json.load(fl_obj) # Get callable object that has parser inside parser = Spm().get_appropriate_parser(eln_dict) parser(template, data_file, config_dict, eln_dict) for key, val in template.items(): - - if val is None: - del template[key] - else: + if val is not None: filled_template[key] = val + else: + del template[key] + if not filled_template.keys(): - raise ValueError("Reader could not read anything! Check for input files and the" - " corresponding extention.") + raise ValueError("Reader could not read anything! Check for input files") return filled_template diff --git a/pynxtools/dataconverter/readers/sts/stm_file_parser.py b/pynxtools/dataconverter/readers/sts/stm_file_parser.py index 6cfae8b3a..29321f278 100644 --- a/pynxtools/dataconverter/readers/sts/stm_file_parser.py +++ b/pynxtools/dataconverter/readers/sts/stm_file_parser.py @@ -28,10 +28,11 @@ import nanonispy as nap from pynxtools.dataconverter.readers.sts.helper import (nested_path_to_slash_separated_path, - to_intended_t, fill_template_from_eln_data, - work_out_overwriteable_field, + fill_template_from_eln_data, + overwrite_fields, link_seperation_from_hard_code, UNIT_TO_SKIP) +from pynxtools.dataconverter.helpers import transform_to_intended_dt logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s') @@ -224,7 +225,7 @@ def indivisual_DATA_field(): signals.append(field_name) nxdata_grp = data_group.replace("DATA[data", f"DATA[{grp_name}") temp_data_field = nxdata_grp + '/' + field_name - scan_dt_arr = to_intended_t(data_dict[path]) + scan_dt_arr = transform_to_intended_dt(data_dict[path]) x_cor_len, y_cor_len = scan_dt_arr.shape # collect for only one data field e.g. forward or backward, as all the data # fields must have the same length of co-ordinate @@ -309,11 +310,11 @@ def get_dimension_info(self, config_dict, data_dict): # parts are X_cor, Y_cor, X_len, Y_len and one unkown value scanfield_parts = scanfield.split(sep) - x_start = to_intended_t(scanfield_parts[0]) - x_len = to_intended_t(scanfield_parts[2]) + x_start = transform_to_intended_dt(scanfield_parts[0]) + x_len = transform_to_intended_dt(scanfield_parts[2]) x_cor = [x_start, x_start + x_len, unit_info] - y_start = to_intended_t(scanfield_parts[1]) - y_len = to_intended_t(scanfield_parts[3]) + y_start = transform_to_intended_dt(scanfield_parts[1]) + y_len = transform_to_intended_dt(scanfield_parts[3]) y_cor = [y_start, y_start + y_len, unit_info] return (x_cor, y_cor) return () @@ -337,12 +338,12 @@ def from_sxm_file_into_template(self, template, config_dict, eln_data_dict): if c_key in temp_keys: if isinstance(c_val, str): if c_val in data_dict: - template[c_key] = to_intended_t(data_dict[c_val]) + template[c_key] = transform_to_intended_dt(data_dict[c_val]) # Handling multiple possible raw data according to user's defined name. if isinstance(c_val, list): for search_key in c_val: if search_key in data_dict: - template[c_key] = to_intended_t(data_dict[search_key]) + template[c_key] = transform_to_intended_dt(data_dict[search_key]) if isinstance(c_val, dict): data_group = "/ENTRY[entry]/DATA[data]" if c_key == data_group: @@ -353,21 +354,21 @@ def from_sxm_file_into_template(self, template, config_dict, eln_data_dict): coor_info, data_group) else: - work_out_overwriteable_field(template, - data_dict, - c_val, - c_key, - nxdl_key_to_modified_key) + overwrite_fields(template, + data_dict, + c_val, + c_key, + nxdl_key_to_modified_key) else: if isinstance(c_val, dict): - work_out_overwriteable_field(template, - data_dict, - c_val, - c_key, - nxdl_key_to_modified_key) + overwrite_fields(template, + data_dict, + c_val, + c_key, + nxdl_key_to_modified_key) else: - template[c_key] = to_intended_t(data_dict[c_val]) if c_val in data_dict \ - else None + template[c_key] = transform_to_intended_dt(data_dict[c_val]) \ + if c_val in data_dict else None # The following function can be used later it link come true in application def. # link_implementation(template, nxdl_key_to_modified_key) link_seperation_from_hard_code(template, nxdl_key_to_modified_key) diff --git a/pynxtools/dataconverter/readers/sts/sts_file_parser.py b/pynxtools/dataconverter/readers/sts/sts_file_parser.py index 5167c62b7..33b0cce48 100644 --- a/pynxtools/dataconverter/readers/sts/sts_file_parser.py +++ b/pynxtools/dataconverter/readers/sts/sts_file_parser.py @@ -29,9 +29,10 @@ import numpy as np from pynxtools.dataconverter.readers.sts.helper import (fill_template_from_eln_data, nested_path_to_slash_separated_path, - work_out_overwriteable_field, + overwrite_fields, link_seperation_from_hard_code, - to_intended_t, UNIT_TO_SKIP) + UNIT_TO_SKIP) +from pynxtools.dataconverter.helpers import transform_to_intended_dt logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s') @@ -458,7 +459,7 @@ def from_dat_file_into_template(template, dat_file, config_dict, eln_data_dict): if c_val in ["", None, 'None', 'none']: continue if isinstance(c_val, str) and c_val in flattened_dict: - template[c_key] = to_intended_t(flattened_dict[c_val]) + template[c_key] = transform_to_intended_dt(flattened_dict[c_val]) if isinstance(c_val, dict) and c_val: data_group_concept = "/ENTRY[entry]/DATA[data]" if data_group_concept == c_key: @@ -469,8 +470,8 @@ def from_dat_file_into_template(template, dat_file, config_dict, eln_data_dict): else: # pass other physical quantity that has muliple dimensions or type for # same physical quantity e.g. in drift_N N will be replaced X, Y and Z - work_out_overwriteable_field(template, flattened_dict, c_val, c_key, - dict_orig_key_to_mod_key) + overwrite_fields(template, flattened_dict, c_val, c_key, + dict_orig_key_to_mod_key) # The following function can be used if links in application come true # link_seperation(template, dict_orig_key_to_mod_key) link_seperation_from_hard_code(template, dict_orig_key_to_mod_key) diff --git a/pynxtools/dataconverter/readers/xrd/__init__.py b/pynxtools/dataconverter/readers/xrd/__init__.py index c9157de8d..d4ec4a8cc 100644 --- a/pynxtools/dataconverter/readers/xrd/__init__.py +++ b/pynxtools/dataconverter/readers/xrd/__init__.py @@ -12,4 +12,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/pynxtools/dataconverter/readers/xrd/config.py b/pynxtools/dataconverter/readers/xrd/config.py new file mode 100644 index 000000000..cdb11005a --- /dev/null +++ b/pynxtools/dataconverter/readers/xrd/config.py @@ -0,0 +1,99 @@ +"""This is config file that mainly maps nexus definition to data path in raw file.""" + +# pylint: disable=C0301 +xrdml = { + "/ENTRY[entry]/2theta_plot/chi": {"xrdml_1.5": {"value": "", + "@units": "", + "@chi_indices": 0}, + }, + "/ENTRY[entry]/2theta_plot/intensity": {"xrdml_1.5": {"value": "/detector", + "@units": ""} + }, + "/ENTRY[entry]/2theta_plot/omega": {"xrdml_1.5": {"value": "/Omega", + "@units": "", + "@omega_indices": 1}, + }, + "/ENTRY[entry]/2theta_plot/title": "Intensity Vs. Two Theta (deg.)", + "/ENTRY[entry]/2theta_plot/phi": {"xrdml_1.5": {"value": "/Omega", + "@units": "", + "@phi_indices": 0}, + }, + "/ENTRY[entry]/2theta_plot/two_theta": {"xrdml_1.5": {"value": "/2Theta", + "@units": "deg", + "@two_theta_indices": 0}, + }, + "/ENTRY[entry]/COLLECTION[collection]/beam_attenuation_factors": {"xrdml_1.5": {"value": "/beamAttenuationFactors", + "@units": ""}, + }, + "/ENTRY[entry]/COLLECTION[collection]/count_time": {"xrdml_1.5": {"value": "/countTime", + "@units": ""}, + }, + "/ENTRY[entry]/COLLECTION[collection]/data_file": {"xrdml_1.5": {"value": ""} + }, + "/ENTRY[entry]/COLLECTION[collection]/goniometer_x": {"xrdml_1.5": {"value": "/X", + "@units": ""}, + }, + "/ENTRY[entry]/COLLECTION[collection]/goniometer_y": {"xrdml_1.5": {"value": "/Y", + "@units": ""}, + }, + "/ENTRY[entry]/COLLECTION[collection]/goniometer_z": {"xrdml_1.5": {"value": "/Z", + "@units": ""}, + }, + "/ENTRY[entry]/COLLECTION[collection]/measurement_type": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/measurementType", + "@units": ""}, + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/integration_time": {"xrdml_1.5": {"value": "", + "@units": ""}, + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/integration_time/@units": {"xrdml_1.5": {"value": "", + "@units": ""}, + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/scanAxis", + "@units": ""}, + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_mode": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/scan/mode", + "@units": ""}, + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/usedWavelength/kAlpha1", + "@units": "/xrdMeasurements/xrdMeasurement/usedWavelength/kAlpha1/unit"}, + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/usedWavelength/kAlpha2", + "@units": "/xrdMeasurements/xrdMeasurement/usedWavelength/kAlpha2/unit"}, + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta": {"xrdml_1.5": {"value": "", + "@units": ""}, + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone": {"xrdml_1.5": {"value": "", + "@units": ""} + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/incidentBeamPath/xRayTube/current", + "@units": ""} + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/source_peak_wavelength": {"xrdml_1.5": {"value": "", + "@units": ""} + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/incidentBeamPath/xRayTube/anodeMaterial", + "@units": ""}, + }, + "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage": {"xrdml_1.5": {"value": "/xrdMeasurements/xrdMeasurement/incidentBeamPath/xRayTube/tension", + "@units": "/xrdMeasurements/xrdMeasurement/incidentBeamPath/xRayTube/tension/unit"} + }, + "/ENTRY[entry]/SAMPLE[sample]/prepared_by": {"xrdml_1.5": {"value": ""} + }, + "/ENTRY[entry]/SAMPLE[sample]/sample_id": {"xrdml_1.5": {"value": ""}, + }, + "/ENTRY[entry]/SAMPLE[sample]/sample_mode": {"xrdml_1.5": {"value": ""}, + }, + "/ENTRY[entry]/SAMPLE[sample]/sample_name": {"xrdml_1.5": {"value": ""}, + }, + "/ENTRY[entry]/definition": "NXxrd_pan", + "/ENTRY[entry]/method": "X-Ray Diffraction (XRD)", + "/ENTRY[entry]/q_plot/intensity": {"xrdml_1.5": {"value": "/detector", + "@units": ""}, + }, + "/ENTRY[entry]/q_plot/q": {"xrdml_1.5": {"value": "", + "@units": ""}, + }, + "/@default": "entry", + "/ENTRY[entry]/@default": "2theta_plot", +} diff --git a/pynxtools/dataconverter/readers/xrd/reader.py b/pynxtools/dataconverter/readers/xrd/reader.py index 4b27911aa..0aca2b9ca 100644 --- a/pynxtools/dataconverter/readers/xrd/reader.py +++ b/pynxtools/dataconverter/readers/xrd/reader.py @@ -1,3 +1,4 @@ +"""XRD reader.""" # Copyright The NOMAD Authors. # # This file is part of NOMAD. See https://nomad-lab.eu for further info. @@ -17,72 +18,138 @@ from typing import Tuple, Any, Dict, Union import json -from pynxtools.dataconverter.readers.xrd.xrd_parser import parse_and_convert_file -from pynxtools.dataconverter.readers.utils import flatten_and_replace, FlattenSettings +from pathlib import Path +import xml.etree.ElementTree as ET + import yaml + +from pynxtools.dataconverter.helpers import (generate_template_from_nxdl, + validate_data_dict) from pynxtools.dataconverter.template import Template +from pynxtools.dataconverter.readers.xrd.xrd_parser import parse_and_fill_template +from pynxtools.dataconverter.readers.utils import flatten_and_replace, FlattenSettings from pynxtools.dataconverter.readers.base.reader import BaseReader +CONVERT_DICT: Dict[str, str] = {} -CONVERT_DICT = { - 'Instrument': 'INSTRUMENT[instrument]', - 'Software': 'SOFTWARE[software]', - 'Hardware': 'Hardware[hardware]', - 'Analyser': 'ELECTRONANALYSER[electronanalyser]', - 'Beam': 'BEAM[beam]', - 'unit': '@units', - 'version': '@version', - 'Sample': 'SAMPLE[sample]', - 'User': 'USER[user]', - 'Data': 'DATA[data]', - 'Source': 'SOURCE[source]', - 'Environment': 'ENVIRONMENT[environment]', - 'Sample_bias': 'SAMPLE_BIAS[sample_bias]' -} +# Global var to collect the root from get_template_from_nxdl_name() +# and use it in the the the varidate_data_dict() +ROOT: ET.Element = None REPLACE_NESTED: Dict[str, str] = {} +XRD_FILE_EXTENSIONS = [".xrdml", "xrdml", ".udf", ".raw", ".xye"] + + +def get_template_from_nxdl_name(nxdl_name): + """Generate template from nxdl name. + + Example of nxdl name could be NXxrd_pan. + Parameters + ---------- + nxdl_name : str + Name of nxdl file e.g. NXmpes + + Returns + ------- + Template + Empty template. + + Raises + ------ + ValueError + Error if nxdl file is not found. + """ + nxdl_file = nxdl_name + ".nxdl.xml" + current_path = Path(__file__) + def_path = current_path.parent.parent.parent.parent / 'definitions' + # Check contributed defintions + full_nxdl_path = Path(def_path, 'contributed_definitions', nxdl_file) + root = None + if full_nxdl_path.exists(): + root = ET.parse(full_nxdl_path).getroot() + else: + # Check application definition + full_nxdl_path = Path(def_path, 'applications', nxdl_file) + + if root is None and full_nxdl_path.exists(): + root = ET.parse(full_nxdl_path).getroot() + else: + full_nxdl_path = Path(def_path, 'base_classes', nxdl_file) + if root is None and full_nxdl_path.exists(): + root = ET.parse(full_nxdl_path).getroot() + elif root is None: + raise ValueError("Need correct NXDL name") -class STMReader(BaseReader): - """ Reader for XPS. + template = Template() + generate_template_from_nxdl(root=root, template=template) + return template + + +def get_template_from_xrd_reader(nxdl_name, file_paths): + """Get filled template from reader. + + Parameters + ---------- + nxdl_name : str + Name of nxdl definition + file_paths : Tuple[str] + Tuple of path of files. + + Returns + ------- + Template + Template which is a map from NeXus concept path to value. """ - supported_nxdls = ["NXroot"] + template = get_template_from_nxdl_name(nxdl_name) + + data = XRDReader().read(template=template, + file_paths=file_paths) + validate_data_dict(template=template, data=data, nxdl_root=ROOT) + return data + + +# pylint: disable=too-few-public-methods +class XRDReader(BaseReader): + """Reader for XRD.""" + + supported_nxdls = ["NXxrd_pan"] def read(self, template: dict = None, file_paths: Tuple[str] = None, objects: Tuple[Any] = None): - """ - General read menthod to prepare the template. - """ - # has_sxm_file: bool = False - # sxm_file: str = "" - # has_dat_file: bool = False - # dat_file: str = "" + """General read menthod to prepare the template.""" + + if not isinstance(file_paths, tuple) and not isinstance(file_paths, list): + file_paths = (file_paths,) filled_template: Union[Dict, None] = Template() - # config_dict: Union[Dict[str, Any], None] = None eln_dict: Union[Dict[str, Any], None] = None config_dict: Dict = {} - - data_file: str = "" + xrd_file: str = "" + xrd_file_ext: str = "" for file in file_paths: - ext = file.rsplit('.', 1)[-1] - if ext == 'json': + ext = "".join(Path(file).suffixes) + if ext == '.json': with open(file, mode="r", encoding="utf-8") as fl_obj: config_dict = json.load(fl_obj) - elif ext in ['yaml', 'yml']: + elif ext in ['.yaml', '.yml']: with open(file, mode="r", encoding="utf-8") as fl_obj: eln_dict = flatten_and_replace( FlattenSettings( yaml.safe_load(fl_obj), - CONVERT_DICT, - REPLACE_NESTED + CONVERT_DICT, REPLACE_NESTED ) ) - else: - xrd_dict = parse_and_convert_file(file) - # TODO combine nyaml, json, and xrd data here + elif ext in XRD_FILE_EXTENSIONS: + xrd_file_ext = ext + xrd_file = file + if xrd_file: + parse_and_fill_template(template, xrd_file, config_dict, eln_dict) + else: + raise ValueError(f"Allowed XRD experimental with extenstion from" + f" {XRD_FILE_EXTENSIONS} found {xrd_file_ext}") # Get rid of empty concept and cleaning up Template for key, val in template.items(): @@ -97,4 +164,4 @@ def read(self, return filled_template -READER = STMReader +READER = XRDReader diff --git a/pynxtools/dataconverter/readers/xrd/xrd_helper.py b/pynxtools/dataconverter/readers/xrd/xrd_helper.py index c9157de8d..6bd4664cb 100644 --- a/pynxtools/dataconverter/readers/xrd/xrd_helper.py +++ b/pynxtools/dataconverter/readers/xrd/xrd_helper.py @@ -1,3 +1,5 @@ +"""XRD helper stuffs.""" + # Copyright The NOMAD Authors. # # This file is part of NOMAD. See https://nomad-lab.eu for further info. @@ -12,4 +14,236 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. + +import warnings +import numpy as np +from pynxtools.dataconverter.helpers import transform_to_intended_dt +from pynxtools.dataconverter.template import Template + + +class KeyValueNotFoundWaring(Warning): + """New Wanrning class""" + + +def get_a_value_or_warn(return_value="", + warning_catagory=KeyValueNotFoundWaring, + message="Key-value not found.", + stack_level=2): + """It returns a value that and rase the warning massage.""" + + warnings.warn(f"\033[1;31m {message}:\033[0m]", warning_catagory, stack_level) + return return_value + + +def feed_xrdml_to_template(template, xrd_dict, eln_dict, file_term, config_dict=None): + """Fill template with data from xrdml type file. + + Parameters + ---------- + template : Dict + Template generated from nxdl definition file. + xrd_dict : dict + Just a dict mapping slash separated key to the data. The key is equivalent to the + path directing the location in data file. + eln_dict : dict + That brings the data from user especially using NeXus according to NeXus concept. + file_term : str + Terminological string to describe file ext. and version (e.g. xrdml_1.5) to find proper + dict from config file. + config_dict : Dict + Dictionary from config file that maps NeXus concept to data from different data file + versions. E.g. + { + "/ENTRY[entry]/2theta_plot/chi": {"file_exp": {"value": "", + "@units": ""},}, + "/ENTRY[entry]/2theta_plot/intensity": {"file_exp": {"value": "/detector", + "@units": ""},} + } + """ + + def fill_template_from_config_data(config_dict: dict, template: Template, + xrd_dict: dict, file_term: str) -> None: + """ + Parameters + ---------- + config_dict : dict + Python dict that is nested dict for different file versions. + e.g. + {"/ENTRY[entry]/2theta_plot/chi": {"file_exp": {"value": "", + "@units": ""},}, + "/ENTRY[entry]/2theta_plot/intensity": {"file_exp": {"value": "/detector", + "@units": ""},} + } + template : Template + + Return + ------ + None + """ + for nx_key, val in config_dict.items(): + if isinstance(val, dict): + raw_data_des: dict = val.get(file_term, None) + if raw_data_des is None: + raise ValueError(f"conflict file config file does not have any data map" + f" for file {file_term}") + # the field does not have any value + if not raw_data_des.get('value', None): + continue + for val_atr_key, path in raw_data_des.items(): + # data or field val + if val_atr_key == 'value': + template[nx_key] = xrd_dict.get(path, None) + # attr e.g. @units + elif path and val_atr_key.startswith('@'): + template[nx_key + '/' + val_atr_key] = xrd_dict.get(path, None) + if not isinstance(val, dict) and isinstance(val, str): + template[nx_key] = val + + fill_template_from_config_data(config_dict, template, + xrd_dict, file_term) + + def two_theta_plot(): + two_theta_gr = "/ENTRY[entry]/2theta_plot" + template[two_theta_gr + "/" + "@axes"] = ["two_theta"] + template[two_theta_gr + "/" + "@signal"] = "intensity" + + def q_plot(): + q_plot_gr = "/ENTRY[entry]/q_plot" + alpha_2 = template.get("/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two", + None) + alpha_1 = template.get("/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one", + None) + two_theta: np.ndarray = template.get("/ENTRY[entry]/2theta_plot/two_theta", None) + if isinstance(two_theta, np.ndarray): + theta: np.ndarray = two_theta / 2 + # pylint: disable=line-too-long + ratio_key = "/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone" + if alpha_1 and alpha_2: + ratio = alpha_2 / alpha_1 + template[ratio_key] = ratio + lamda = ratio * alpha_1 + (1 - ratio) * alpha_2 + q_vec = (4 * np.pi / lamda) * np.sin(np.deg2rad(theta)) + template[q_plot_gr + "/" + "q_vec"] = q_vec + template[q_plot_gr + "/" + "@q_vec_indicies"] = 0 + template[q_plot_gr + "/" + "@axes"] = ["q_vec"] + + def handle_special_fields(): + """Some fields need special treatment.""" + + key = "/ENTRY[entry]/COLLECTION[collection]/goniometer_x" + gonio_x = template.get(key, None) + + template[key] = gonio_x[0] if (isinstance(gonio_x, np.ndarray) + and gonio_x.shape == (1,)) else gonio_x + + key = "/ENTRY[entry]/COLLECTION[collection]/goniometer_y" + gonio_y = template.get(key, None) + + template[key] = gonio_y[0] if (isinstance(gonio_y, np.ndarray) + and gonio_y.shape == (1,)) else gonio_y + + key = "/ENTRY[entry]/COLLECTION[collection]/goniometer_z" + gonio_z = template.get(key, None) + + template[key] = gonio_z[0] if (isinstance(gonio_z, np.ndarray) + and gonio_z.shape == (1,)) else gonio_z + + key = "/ENTRY[entry]/COLLECTION[collection]/count_time" + count_time = template.get(key, None) + + template[key] = count_time[0] if (isinstance(count_time, np.ndarray) + and count_time.shape == (1,)) else count_time + + two_theta_plot() + q_plot() + handle_special_fields() + + fill_template_from_eln_data(eln_dict, template) + + +# pylint: disable=unused-argument +def feed_udf_to_template(template, xrd_dict, eln_dict, config_dict): + """_summary_ + + Parameters + ---------- + template : _type_ + _description_ + xrd_dict : _type_ + _description_ + eln_dict : _type_ + _description_ + config_dict : _type_ + _description_ + """ + + +def feed_raw_to_template(template, xrd_dict, eln_dict, config_dict): + """_summary_ + + Parameters + ---------- + template : _type_ + _description_ + xrd_dict : _type_ + _description_ + eln_dict : _type_ + _description_ + config_dict : _type_ + _description_ + """ + + +def feed_xye_to_template(template, xrd_dict, eln_dict, config_dict): + """_summary_ + + Parameters + ---------- + template : _type_ + _description_ + xrd_dict : _type_ + _description_ + eln_dict : _type_ + _description_ + config_dict : _type_ + _description_ + """ + + +def fill_template_from_eln_data(eln_data_dict, template): + """Fill out the template from dict that generated from eln yaml file. + Parameters: + ----------- + eln_data_dict : dict[str, Any] + Python dictionary from eln file. + template : dict[str, Any] + Return: + ------- + None + """ + + if eln_data_dict is None: + return + for e_key, e_val in eln_data_dict.items(): + template[e_key] = transform_to_intended_dt(e_val) + + +def fill_nxdata_from_xrdml(template, + xrd_flattend_dict, + dt_nevigator_from_config_file, + data_group_concept + ): + """_summary_ + + Parameters + ---------- + template : _type_ + _description_ + xrd_flattend_dict : _type_ + _description_ + dt_nevigator_from_config_file : _type_ + _description_ + data_group_concept : _type_ + _description_ + """ diff --git a/pynxtools/dataconverter/readers/xrd/xrd_parser.py b/pynxtools/dataconverter/readers/xrd/xrd_parser.py index 1540f2f69..ed5dd0f75 100644 --- a/pynxtools/dataconverter/readers/xrd/xrd_parser.py +++ b/pynxtools/dataconverter/readers/xrd/xrd_parser.py @@ -1,7 +1,7 @@ """ XRD file parser collection. -TODO: Extend the module level doc. """ + # Copyright The NOMAD Authors. # # This file is part of NOMAD. See https://nomad-lab.eu for further info. @@ -18,209 +18,371 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re # for regular expressions -import os # for file path operations -import xml.etree.ElementTree as ET # for XML parsing -from xrayutilities.io.panalytical_xml import XRDMLFile # for reading XRDML files - -class FileReader: - '''A class to read files from a given file path.''' - def __init__(self, file_path): - ''' - Args: - file_path (str): The path of the file to be read. - ''' - self.file_path = file_path +from pathlib import Path +import warnings +import xml.etree.ElementTree as ET # for XML parsing +from xrayutilities.io.panalytical_xml import XRDMLFile # for reading XRDML files +from pynxtools.dataconverter.helpers import transform_to_intended_dt, remove_namespace_from_tag +from pynxtools.dataconverter.readers.xrd.xrd_helper import feed_xrdml_to_template + + +def fill_slash_sep_dict_from_nested_dict(parent_path: str, nested_dict: dict, + slash_sep_dict: dict): + """Convert a nested dict into slash separated dict. + + Extend slash_sep_dict by key (slash separated key) from nested dict. + + Parameters + ---------- + parent_path : str + Parent path to be appended at the starting of slash separated key. + nested_dict : dict + Dict nesting other dict. + slash_sep_dict : dict + Plain dict to be extended by key value generated from nested_dict. + """ + for key, val in nested_dict.items(): + slash_sep_path = parent_path + key + if isinstance(val, dict): + fill_slash_sep_dict_from_nested_dict(slash_sep_path, val, slash_sep_dict) + else: + slash_sep_dict[slash_sep_path] = val - def read_file(self): - '''Reads the content of a file from the given file path. - Returns: - str: The content of the file. - ''' - with open(self.file_path, 'r', encoding='utf-8') as file: - content = file.read() - return content +class IgnoreNodeTextWarning(Warning): + """Special class to warn node text skip.""" -class PanalyticalXRDMLParser: - '''A class to parse Panalytical XRDML files.''' +class XRDMLParser: + """Parser for xrdml file with the help of other XRD library e.g. panalytical_xml.""" def __init__(self, file_path): - ''' - Args: - file_path (str): The path of the XRDML file to be parsed. - ''' - self.file_path = file_path - - def parse_metadata(self): - '''Parses the metadata of the XRDML file.''' - - with open(self.file_path, 'r', encoding='utf-8') as file: - content = file.read() - - # Remove the XML encoding declaration if it exists - content = re.sub(r'<\?xml.*\?>', '', content) - - root = ET.fromstring(content) - - ns_version = root.tag.split("}")[0].strip("{") - ns = {'xrd': ns_version} - - xrd_measurement = root.find("xrd:xrdMeasurement", ns) - - metadata = { - "measurement_type": xrd_measurement.get("measurementType"), - "sample_mode": xrd_measurement.get("sampleMode"), - "source": { - "voltage": float(xrd_measurement.find("xrd:incidentBeamPath/xrd:xRayTube/xrd:tension", ns).text) if xrd_measurement.find("xrd:incidentBeamPath/xrd:xRayTube/xrd:tension", ns) is not None else None, - "current": float(xrd_measurement.find("xrd:incidentBeamPath/xrd:xRayTube/xrd:current", ns).text) if xrd_measurement.find("xrd:incidentBeamPath/xrd:xRayTube/xrd:current", ns) is not None else None, - "kAlpha1": float(xrd_measurement.find("xrd:usedWavelength/xrd:kAlpha1", ns).text) if xrd_measurement.find("xrd:usedWavelength/xrd:kAlpha1", ns) is not None else None, - "kAlpha2": float(xrd_measurement.find("xrd:usedWavelength/xrd:kAlpha2", ns).text) if xrd_measurement.find("xrd:usedWavelength/xrd:kAlpha2", ns) is not None else None, - "anode_material": xrd_measurement.find("xrd:incidentBeamPath/xrd:xRayTube/xrd:anodeMaterial", ns).text if xrd_measurement.find("xrd:incidentBeamPath/xrd:xRayTube/xrd:anodeMaterial", ns) is not None else None, - }, - - "scan_mode": xrd_measurement.find("xrd:scan", ns).get("mode") if xrd_measurement.find("xrd:scan", ns) is not None else None, - "scan_axis": xrd_measurement.find("xrd:scan", ns).get("scanAxis") if xrd_measurement.find("xrd:scan", ns) is not None else None, - } - print(metadata) - return metadata - - - def parse_xrdml(self): - '''Parses the XRDML file using xrayutilities. - - Returns: - dict: A dictionary containing the parsed XRDML data. - ''' - # Read the XRDML file using xrayutilities - xrd_data = XRDMLFile(self.file_path) - result = xrd_data.scan.ddict - print(result.keys()) - print(f"counts: {result['counts']}") - print(f"detector: {result['detector']}") + """Construct XRDMLParser obj. + + Parameters + ---------- + file_path : str + Path of the file. + """ + # In future it can be utilised later it different versions of file + # self.__version = None + self.__xrd_dict = {} + self.__file_path = file_path + self.xrdml_version: str = "" + self.xml_root = ET.parse(self.__file_path).getroot() + self.find_version() + # Important note for key-val pair separator list: preceding elements have precedence on the + # on the following elements + self.key_val_pair_sprtr = (';', ',') + # Important note for key-val separator list: preceding elements have precedence on the + # on the following elements + self.key_val_sprtr = ('=', ':') + + def find_version(self): + """To find xrdml file version.""" + schema_loc = "{http://www.w3.org/2001/XMLSchema-instance}schemaLocation" + # str: 'http://www.xrdml.com/XRDMeasurement/1.5 + version = self.xml_root.get(schema_loc).split(' ')[0] + self.xrdml_version = version.split('/')[-1] + + def get_slash_separated_xrd_dict(self): + """Return a dict with slash separated key and value from xrd file. + + The key is the slash separated string path for nested xml elements. + + Returns + ------- + dict: + Dictionary where key maps xml nested elements by slash separated str. + """ + # To navigate different functions in future according to some parameters + # such as version, and data analysis module from panalytical_xml + self.handle_with_panalytical_module() + return self.__xrd_dict + + def handle_with_panalytical_module(self): + """Handeling XRDml file by parsing xml file and Pnanalytical_xml parser + + Panalytical module extends and constructs some array data from experiment settings + comes with xml file. + """ + self.parse_each_elm(parent_path='/', xml_node=self.xml_root) + + # Extract other numerical data e.g. 'hkl', 'Omega', '2Theta', CountTime etc + # using panalytical_xml module + parsed_data = XRDMLFile(self.__file_path) + nested_data_dict = parsed_data.scan.ddict + fill_slash_sep_dict_from_nested_dict('/', nested_data_dict, self.__xrd_dict) + + def process_node_text(self, parent_path, node_txt) -> None: + """Processing text of node + + Parameters + ---------- + parent_path : str + Starting str of the key when forming a string key. + node_txt : str + text from node. + + Returns + ------ + None + """ + key_val_pairs = [] + # get key-val pair + for sep in self.key_val_pair_sprtr: + if sep in node_txt: + key_val_pairs.extend(node_txt.split(sep)) + break + # Separate key-val, build full path and + # store them in dict + if key_val_pairs: + for key_val in key_val_pairs: + for k_v_sep in self.key_val_sprtr: + if k_v_sep in key_val: + key, val = key_val.split(k_v_sep) + key = key.replace(' ', '') + self.__xrd_dict['/'.join([parent_path, key])] = val + break + # Handling array data comes as node text + else: + try: + self.__xrd_dict[parent_path] = transform_to_intended_dt(node_txt) + except ValueError: + warnings.warn(f'Element text {node_txt} is ignored from parseing!', + IgnoreNodeTextWarning) + + def parse_each_elm(self, parent_path, xml_node): + """Check each xml element and send the element to intended function. + + Parameters + ---------- + parent_path : str + Path to be in the starting of the key composing from element e.g. '/'. + xml_node : XML.Element + Any element except process instruction nodes. + + Returns + ------ + None + """ + tag = remove_namespace_from_tag(xml_node.tag) + # Take care of special node of 'entry' tag + if tag == 'entry': + parent_path = self.parse_entry_elm(parent_path, xml_node) + else: + parent_path = self.parse_general_elm(parent_path, xml_node) + + for child in iter(xml_node): + if child is not None: + self.parse_each_elm(parent_path, child) + + def parse_general_elm(self, parent_path, xml_node): + """Handle general element except entry element. + + Parameters + ---------- + parent_path : str + Path to be in the starting of the key composing from element e.g. '/'. + xml_node : XML.Element + Any element except process instruction and entry nodes. + + Returns + ------- + None + """ + tag = remove_namespace_from_tag(xml_node.tag) + + if parent_path == '/': + parent_path = '/' + tag + else: + # New parent path ends with element tag + parent_path = '/'.join([parent_path, tag]) + + node_attr = xml_node.attrib + if node_attr: + for key, val in node_attr.items(): + # Some attr has namespace + key = remove_namespace_from_tag(key) + key = key.replace(' ', '_') + path_extend = '/'.join([parent_path, key]) + self.__xrd_dict[path_extend] = val + + node_txt = xml_node.text + if node_txt: + self.process_node_text(parent_path, node_txt) + + return parent_path + + def parse_entry_elm(self, parent_path, xml_node): + """Handle entry element. + + Parameters + ---------- + parent_path : str + Path to be in the starting of the key composing from element e.g. '/'. + xml_node : XML.Element + Any entry node. + + Returns + ------- + str: + Parent path. + """ + tag = remove_namespace_from_tag(xml_node.tag) + + if parent_path == '/': + parent_path = '/' + tag + else: + # Parent path ends with element tag + parent_path = '/'.join([parent_path, tag]) + node_attr = xml_node.attrib + if node_attr: + for key, val in node_attr.items(): + # Some attributes have namespace + key = remove_namespace_from_tag(key) + path_extend = '/'.join([parent_path, key]) + self.__xrd_dict[path_extend] = val - # Add the scanmotname, material, hkl to the dictionary - result["scanmotname"] = xrd_data.scan.scanmotname - result["material"] = xrd_data.scan.material - result["hkl"] = xrd_data.scan.hkl - # add the metadata to the dictionary - result["metadata"] = self.parse_metadata() + # In entry element text must get special care on it + node_txt = xml_node.text + if node_txt: + self.process_node_text(parent_path, node_txt) - return result + return parent_path class FormatParser: - '''A class to identify and parse different file formats.''' + """A class to identify and parse different file formats.""" def __init__(self, file_path): - ''' - Args: - file_path (str): The path of the file to be identified and parsed. - ''' + """Construct FormatParser obj. + + Parameters + ---------- + file_path : str + XRD file to be parsed. + + Returns + ------- + None + """ self.file_path = file_path + self.file_parser = XRDMLParser(self.file_path) + # termilnological name of file to read config file + self.file_term = 'xrdml_' + self.file_parser.xrdml_version - def identify_format(self): - '''Identifies the format of a given file. + def get_file_format(self): + """Identifies the format of a given file. Returns: - str: The file extension of the file. - ''' - file_extension = os.path.splitext(self.file_path)[1].lower() + -------- + str: + The file extension of the file. + """ + file_extension = ''.join(Path(self.file_path).suffixes) return file_extension - def parse_panalytical_xrdml(self): - '''Parses a Panalytical XRDML file. + def parse_xrdml(self): + """Parses a Panalytical XRDML file. - Returns: - dict: A dictionary containing the parsed XRDML - data. - ''' - xrdml_parser = PanalyticalXRDMLParser(self.file_path) - return xrdml_parser.parse_xrdml() + Returns + ------- + dict + A dictionary containing the parsed XRDML data. + """ + return self.file_parser.get_slash_separated_xrd_dict() def parse_panalytical_udf(self): - '''Parse the Panalytical .udf file. + """Parse the Panalytical .udf file. - Returns: - None: Placeholder for parsing .udf files. - ''' - pass + Returns + ------- + None + Placeholder for parsing .udf files. + """ def parse_bruker_raw(self): - '''Parse the Bruker .raw file. + """Parse the Bruker .raw file. - Returns: - None: Placeholder for parsing .raw files. - ''' - pass + Returns + None + """ def parse_bruker_xye(self): - '''Parse the Bruker .xye file. - + """Parse the Bruker .xye file. + + Returns + None + """ + + # pylint: disable=import-outside-toplevel + def parse_and_populate_template(self, template, config_dict, eln_dict): + """Parse xrd file into dict and fill the template. + + Parameters + ---------- + template : Template + NeXus template generated from NeXus application definitions. + xrd_file : str + Name of the xrd file. + config_dict : dict + A dict geenerated from python + eln_dict : dict + A dict generatd from eln yaml file. Returns: - None: Placeholder for parsing .xye files. - ''' - pass + None + """ + + file_format = self.get_file_format() + if file_format == ".xrdml": + xrd_dict = self.parse() + if len(config_dict) == 0 and self.file_parser.xrdml_version == '1.5': + from pynxtools.dataconverter.readers.xrd.config import xrdml + config_dict = xrdml + feed_xrdml_to_template(template, xrd_dict, eln_dict, + file_term=self.file_term, config_dict=config_dict) def parse(self): '''Parses the file based on its format. Returns: - dict: A dictionary containing the parsed data. + dict + A dictionary containing the parsed data. Raises: ValueError: If the file format is unsupported. ''' - file_format = self.identify_format() - + file_format = self.get_file_format() + slash_sep_dict = {} if file_format == ".xrdml": - return self.parse_panalytical_xrdml() - elif file_format == ".udf": - return self.parse_panalytical_udf() - elif file_format == ".raw": - return self.parse_bruker_raw() - elif file_format == ".xye": - return self.parse_bruker_xye() - else: - raise ValueError(f"Unsupported file format: {file_format}") - - -class DataConverter: - '''A class to convert parsed data into a common dictionary format.''' - - def __init__(self, parsed_data): - ''' - Args: - parsed_data (dict): The parsed data to be converted. - ''' - self.parsed_data = parsed_data - - def convert(self): - '''Converts the parsed data into a common dictionary format. - - Returns: - dict: The converted data in a common dictionary format. - ''' - # In this case, the parsed_data is already in the common dictionary format - # If you need additional conversion or data processing, implement it here - return self.parsed_data - -def parse_and_convert_file(file_path): - '''The main function to parse and convert a file. - Args: - file_path (str): The path of the file to be parsed and converted. - - Returns: - dict: The parsed and converted data in a common dictionary format. - ''' - file_path = os.path.abspath(file_path) - - format_parser = FormatParser(file_path) - parsed_data = format_parser.parse() - - data_converter = DataConverter(parsed_data) - common_data = data_converter.convert() - - return common_data + slash_sep_dict = self.parse_xrdml() + # elif file_format == ".udf": + # return self.parse_panalytical_udf() + # elif file_format == ".raw": + # return self.parse_bruker_raw() + # elif file_format == ".xye": + # return self.parse_bruker_xye() + # else: + # raise ValueError(f"Unsupported file format: {file_format}") + return slash_sep_dict + + +def parse_and_fill_template(template, xrd_file, config_dict, eln_dict): + """Parse xrd file and fill the template with data from that file. + + Parameters + ---------- + template : Template[dict] + Template gnenerated from nxdl definition. + xrd_file : str + Name of the xrd file with extension + config_dict : Dict + Dictionary from config.json or similar file. + eln_dict : Dict + Plain and '/' separated dictionary from yaml for ELN. + """ + + format_parser = FormatParser(xrd_file) + format_parser.parse_and_populate_template(template, config_dict, eln_dict) diff --git a/pynxtools/dataconverter/template.py b/pynxtools/dataconverter/template.py index 28762ea17..8cb83a7da 100644 --- a/pynxtools/dataconverter/template.py +++ b/pynxtools/dataconverter/template.py @@ -125,6 +125,29 @@ def __contains__(self, k): k in self.required ]) + def get(self, key, return_value=None): + """Implementing get method for template. + + Parameters + ---------- + key : str + Template key + return_value : Any + + return : + The value comes with return_value + """ + val = self.optional.get(key, None) + if val is None: + val = self.recommended.get(key, None) + if val is None: + val = self.required.get(key, None) + if val is None: + val = self.undocumented.get(key, None) + if val is None: + return return_value + return val + def __getitem__(self, k): """Handles how values are accessed from the Template object.""" # Try setting item in all else throw error. Does not append to default. diff --git a/pynxtools/dataconverter/writer.py b/pynxtools/dataconverter/writer.py index 6fc52337f..76d76cb75 100644 --- a/pynxtools/dataconverter/writer.py +++ b/pynxtools/dataconverter/writer.py @@ -190,12 +190,19 @@ class Writer: nxs_namespace (str): The namespace used in the NXDL tags. Helps search for XML children. """ - def __init__(self, data: dict = None, nxdl_path: str = None, output_path: str = None): + def __init__(self, data: dict = None, + nxdl_path: str = None, + output_path: str = None, write_in_memory: bool = False): """Constructs the necessary objects required by the Writer class.""" self.data = data self.nxdl_path = nxdl_path self.output_path = output_path - self.output_nexus = h5py.File(self.output_path, "w") + self.write_in_memory = write_in_memory + if self.write_in_memory: + self.output_nexus = h5py.File(self.output_path, "w", driver="core", backing_store=False) + else: + self.output_nexus = h5py.File(self.output_path, "w") + self.nxdl_data = ET.parse(self.nxdl_path).getroot() self.nxs_namespace = get_namespace(self.nxdl_data) @@ -241,8 +248,9 @@ def ensure_and_get_parent_node(self, path: str, undocumented_paths) -> h5py.Grou return grp return self.output_nexus[parent_path_hdf5] - def write(self): - """Writes the NeXus file with previously validated data from the reader with NXDL attrs.""" + def _process_data_into_hdf5(self): + """Store data in hdf5 in in-memory file or file.""" + hdf5_links_for_later = [] def add_units_key(dataset, path): @@ -277,6 +285,7 @@ def add_units_key(dataset, path): except Exception as exc: raise IOError(f"Unknown error occured writing the path: {path} " f"with the following message: {str(exc)}") from exc + for links in hdf5_links_for_later: dataset = handle_dicts_entries(*links) if dataset is None: @@ -306,4 +315,28 @@ def add_units_key(dataset, path): raise IOError(f"Unknown error occured writing the path: {path} " f"with the following message: {str(exc)}") from exc - self.output_nexus.close() + def write(self): + """Writes the NeXus file with previously validated data from the reader with NXDL attrs.""" + try: + if self.write_in_memory: + raise ValueError("To write in memory and get the file obhect please use " + "the method get_in_memory_obj()") + self._process_data_into_hdf5() + finally: + self.output_nexus.close() + + def get_in_memory_obj(self): + """Write the nexus file as in-memory obj. + + Write in nexus in-memory file obj and return that file object. + """ + try: + if self.write_in_memory: + self._process_data_into_hdf5() + else: + raise ValueError("The write_in_memory variable is False Writer" + "class initialization.") + except Exception as excp: + self.output_nexus.close() + raise excp + return self.output_nexus diff --git a/pynxtools/definitions b/pynxtools/definitions index 1a694807a..f0d7e0624 160000 --- a/pynxtools/definitions +++ b/pynxtools/definitions @@ -1 +1 @@ -Subproject commit 1a694807aaea98cea34240ee60300692a4fb5dc9 +Subproject commit f0d7e06241585543bd49699face4ac0ff76cc662 diff --git a/pynxtools/eln_mapper/scheme_eln.py b/pynxtools/eln_mapper/scheme_eln.py index 277658c28..1152bbd08 100644 --- a/pynxtools/eln_mapper/scheme_eln.py +++ b/pynxtools/eln_mapper/scheme_eln.py @@ -276,7 +276,6 @@ def generate_scheme_eln(nexus_def: str, eln_file_name: str = None) -> None: recursive_dict: Dict[str, Any] = {} get_eln_recursive_dict(recursive_dict, nxdl_file) - # print('recursive_dict', recursive_dict) with open(out_file, mode='w', encoding='utf-8') as out_f: yaml.dump(recursive_dict, sort_keys=False, stream=out_f) diff --git a/pynxtools/nexus/nexus.py b/pynxtools/nexus/nexus.py index ae6a794eb..ef5f64cd5 100644 --- a/pynxtools/nexus/nexus.py +++ b/pynxtools/nexus/nexus.py @@ -564,8 +564,11 @@ def hdf_node_to_self_concept_path(hdf_info, logger): class HandleNexus: """documentation""" + + # pylint: disable=too-many-instance-attributes def __init__(self, logger, nexus_file, - d_inq_nd=None, c_inq_nd=None): + d_inq_nd=None, c_inq_nd=None, + is_in_memory_file=False): self.logger = logger local_dir = os.path.abspath(os.path.dirname(__file__)) @@ -573,6 +576,7 @@ def __init__(self, logger, nexus_file, os.path.join(local_dir, '../../tests/data/nexus/201805_WSe2_arpes.nxs') self.parser = None self.in_file = None + self.is_hdf5_file_obj = is_in_memory_file self.d_inq_nd = d_inq_nd self.c_inq_nd = c_inq_nd # Aggregating hdf path corresponds to concept query node @@ -639,19 +643,28 @@ def full_visit(self, root, hdf_node, name, func): def process_nexus_master_file(self, parser): """Process a nexus master file by processing all its nodes and their attributes""" self.parser = parser - self.in_file = h5py.File( - self.input_file_name[0] - if isinstance(self.input_file_name, list) - else self.input_file_name, 'r' - ) - self.full_visit(self.in_file, self.in_file, '', self.visit_node) - if self.d_inq_nd is None and self.c_inq_nd is None: - get_default_plotable(self.in_file, self.logger) - # To log the provided concept and concepts founded - if self.c_inq_nd is not None: - for hdf_path in self.hdf_path_list_for_c_inq_nd: - self.logger.info(hdf_path) - self.in_file.close() + try: + if not self.is_hdf5_file_obj: + self.in_file = h5py.File( + self.input_file_name[0] + if isinstance(self.input_file_name, list) + else self.input_file_name, 'r' + ) + else: + self.in_file = self.input_file_name + + self.full_visit(self.in_file, self.in_file, '', self.visit_node) + + if self.d_inq_nd is None and self.c_inq_nd is None: + get_default_plotable(self.in_file, self.logger) + # To log the provided concept and concepts founded + if self.c_inq_nd is not None: + for hdf_path in self.hdf_path_list_for_c_inq_nd: + self.logger.info(hdf_path) + finally: + # To test if hdf_file is open print(self.in_file.id.valid) + self.in_file.close() + # To test if hdf_file is open print(self.in_file.id.valid) @click.command() diff --git a/pyproject.toml b/pyproject.toml index 8b1af5f0e..2fbec6688 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ dependencies = [ "tzlocal<=4.3", "scipy>=1.7.1", "lark>=1.1.5", + "xrayutilities>=1.7.4", "requests", "requests_cache", "nanonispy@git+https://github.com/ramav87/nanonispy.git@a0da87c58482d29624a2bf5deecb763dd1274212", diff --git a/tests/nexus/test_nexus.py b/tests/nexus/test_nexus.py index aec3feb17..95d3c2b23 100644 --- a/tests/nexus/test_nexus.py +++ b/tests/nexus/test_nexus.py @@ -79,9 +79,6 @@ def test_nexus(tmp_path): # # didn't work with filecmp library # log = os.path.join(local_dir, '../data/nexus_test_data/nexus_test.log') # ref = os.path.join(local_dir, '../data/nexus_test_data/Ref_nexus_test.log') - # print(filecmp.cmp(log, ref, shallow=False)) - - # print('Testing of nexus.py is SUCCESSFUL.') def test_get_node_at_nxdl_path():