Skip to content

Commit

Permalink
Merge branch 'master' into refactor-xps-reader
Browse files Browse the repository at this point in the history
  • Loading branch information
lukaspie committed Dec 4, 2023
2 parents afe46e0 + 8620c4e commit fabe938
Show file tree
Hide file tree
Showing 19 changed files with 2,051 additions and 162 deletions.
9 changes: 1 addition & 8 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
#
# pip-compile --extra=dev --output-file=dev-requirements.txt pyproject.toml
#
appnope==0.1.3
# via
# ipykernel
# ipython
asciitree==0.3.3
# via zarr
ase==3.22.1
Expand Down Expand Up @@ -48,16 +44,13 @@ comm==0.2.0
contourpy==1.1.1
# via matplotlib
coverage[toml]==7.3.2
# via
# coverage
# pytest-cov
# via pytest-cov
cycler==0.12.1
# via matplotlib
cython==3.0.6
# via tables
dask[array]==2023.5.0
# via
# dask
# hyperspy
# kikuchipy
# orix
Expand Down
140 changes: 113 additions & 27 deletions pynxtools/dataconverter/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import logging
import os
import sys
from typing import List, Tuple
from typing import List, Tuple, Optional
import xml.etree.ElementTree as ET

import click
Expand Down Expand Up @@ -80,60 +80,146 @@ def get_names_of_all_readers() -> List[str]:
return all_readers + plugins


# pylint: disable=too-many-arguments,too-many-locals
def convert(input_file: Tuple[str, ...],
reader: str,
nxdl: str,
output: str,
generate_template: bool = False,
fair: bool = False,
undocumented: bool = False,
**kwargs):
"""The conversion routine that takes the input parameters and calls the necessary functions."""
def get_nxdl_root_and_path(nxdl: str):
"""Get xml root element and file path from nxdl name e.g. NXapm.
Parameters
----------
nxdl: str
Name of nxdl file e.g. NXapm from NXapm.nxdl.xml.
Returns
-------
ET.root
Root element of nxdl file.
str
Path of nxdl file.
Raises
------
FileNotFoundError
Error if no file with the given nxdl name is found.
"""
# Reading in the NXDL and generating a template
definitions_path = nexus.get_nexus_definitions_path()
if nxdl == "NXtest":
nxdl_path = os.path.join(
nxdl_f_path = os.path.join(
f"{os.path.abspath(os.path.dirname(__file__))}/../../",
"tests", "data", "dataconverter", "NXtest.nxdl.xml")
elif nxdl == "NXroot":
nxdl_path = os.path.join(definitions_path, "base_classes", "NXroot.nxdl.xml")
nxdl_f_path = os.path.join(definitions_path, "base_classes", "NXroot.nxdl.xml")
else:
nxdl_path = os.path.join(definitions_path, "contributed_definitions", f"{nxdl}.nxdl.xml")
if not os.path.exists(nxdl_path):
nxdl_path = os.path.join(definitions_path, "applications", f"{nxdl}.nxdl.xml")
if not os.path.exists(nxdl_path):
nxdl_f_path = os.path.join(definitions_path, "contributed_definitions", f"{nxdl}.nxdl.xml")
if not os.path.exists(nxdl_f_path):
nxdl_f_path = os.path.join(definitions_path, "applications", f"{nxdl}.nxdl.xml")
if not os.path.exists(nxdl_f_path):
nxdl_f_path = os.path.join(definitions_path, "base_classes", f"{nxdl}.nxdl.xml")
if not os.path.exists(nxdl_f_path):
raise FileNotFoundError(f"The nxdl file, {nxdl}, was not found.")

nxdl_root = ET.parse(nxdl_path).getroot()
return ET.parse(nxdl_f_path).getroot(), nxdl_f_path

if undocumented:
logger.setLevel(UNDOCUMENTED)

def transfer_data_into_template(input_file,
reader, nxdl_name,
nxdl_root: Optional[ET.Element] = None,
**kwargs):
"""Transfer parse and merged data from input experimental file, config file and eln.
Experimental and eln files will be parsed and finally will be merged into template.
Before returning the template validate the template data.
Parameters
----------
input_file : Union[tuple[str], str]
Tuple of files or file
reader: str
Name of reader such as xps
nxdl_name : str
Root name of nxdl file, e.g. NXmpes from NXmpes.nxdl.xml
nxdl_root : ET.element
Root element of nxdl file, otherwise provide nxdl_name
Returns
-------
Template
Template filled with data from raw file and eln file.
"""
if nxdl_root is None:
nxdl_root, _ = get_nxdl_root_and_path(nxdl=nxdl_name)

template = Template()
helpers.generate_template_from_nxdl(nxdl_root, template)
if generate_template:
logger.info(template)
return

# Setting up all the input data
if isinstance(input_file, str):
input_file = (input_file,)

bulletpoint = "\n\u2022 "
logger.info("Using %s reader to convert the given files: %s ",
reader,
bulletpoint.join((" ", *input_file)))

data_reader = get_reader(reader)
if not (nxdl in data_reader.supported_nxdls or "*" in data_reader.supported_nxdls):
if not (nxdl_name in data_reader.supported_nxdls or "*" in data_reader.supported_nxdls):
raise NotImplementedError("The chosen NXDL isn't supported by the selected reader.")

data = data_reader().read( # type: ignore[operator]
template=Template(template),
file_paths=input_file,
**kwargs,
**kwargs
)
helpers.validate_data_dict(template, data, nxdl_root)
return data


# pylint: disable=too-many-arguments,too-many-locals
def convert(input_file: Tuple[str, ...],
reader: str,
nxdl: str,
output: str,
generate_template: bool = False,
fair: bool = False,
undocumented: bool = False,
**kwargs):
"""The conversion routine that takes the input parameters and calls the necessary functions.
Parameters
----------
input_file : Tuple[str]
Tuple of files or file
reader: str
Name of reader such as xps
nxdl : str
Root name of nxdl file, e.g. NXmpes for NXmpes.nxdl.xml
output : str
Output file name.
generate_template : bool, default False
True if user wants template in logger info.
fair : bool, default False
If True, a warning is given that there are undocumented paths
in the template.
undocumented : bool, default False
If True, an undocumented warning is given.
Returns
-------
None.
"""

nxdl_root, nxdl_f_path = get_nxdl_root_and_path(nxdl)

if generate_template:
template = Template()
helpers.generate_template_from_nxdl(nxdl_root, template)
logger.info(template)
return

data = transfer_data_into_template(input_file=input_file, reader=reader,
nxdl_name=nxdl, nxdl_root=nxdl_root,
**kwargs)
if undocumented:
logger.setLevel(UNDOCUMENTED)
if fair and data.undocumented.keys():
logger.warning("There are undocumented paths in the template. This is not acceptable!")
return
Expand All @@ -147,7 +233,7 @@ def convert(input_file: Tuple[str, ...],
path
)
helpers.add_default_root_attributes(data=data, filename=os.path.basename(output))
Writer(data=data, nxdl_path=nxdl_path, output_path=output).write()
Writer(data=data, nxdl_f_path=nxdl_f_path, output_path=output).write()

logger.info("The output file generated: %s", output)

Expand Down
77 changes: 76 additions & 1 deletion pynxtools/dataconverter/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
#
"""Helper functions commonly used by the convert routine."""

from typing import List
from typing import List, Optional, Any
from typing import Tuple, Callable, Union
import re
import xml.etree.ElementTree as ET
from datetime import datetime, timezone
import logging
import json

import numpy as np
from ase.data import chemical_symbols
Expand Down Expand Up @@ -650,3 +651,77 @@ def extract_atom_types(formula, mode='hill'):
return convert_to_hill(atom_types)

return atom_types


# pylint: disable=too-many-branches
def transform_to_intended_dt(str_value: Any) -> Optional[Any]:
"""Transform string to the intended data type, if not then return str_value.
E.g '2.5E-2' will be transfor into 2.5E-2
tested with: '2.4E-23', '28', '45.98', 'test', ['59', '3.00005', '498E-34'],
'23 34 444 5000', None
with result: 2.4e-23, 28, 45.98, test, [5.90000e+01 3.00005e+00 4.98000e-32],
np.array([23 34 444 5000]), None
NOTE: add another arg in this func for giving 'hint' what kind of data like
numpy array or list
Parameters
----------
str_value : str
Data from other format that comes as string e.g. string of list.
Returns
-------
Union[str, int, float, np.ndarray]
Converted data type
"""

symbol_list_for_data_seperation = [';', ' ']
transformed: Any = None

if isinstance(str_value, list):
try:
transformed = np.array(str_value, dtype=np.float64)
return transformed
except ValueError:
pass

elif isinstance(str_value, np.ndarray):
return str_value
elif isinstance(str_value, str):
try:
transformed = int(str_value)
except ValueError:
try:
transformed = float(str_value)
except ValueError:
if '[' in str_value and ']' in str_value:
transformed = json.loads(str_value)
if transformed is not None:
return transformed
for sym in symbol_list_for_data_seperation:
if sym in str_value:
parts = str_value.split(sym)
modified_parts: List = []
for part in parts:
part = transform_to_intended_dt(part)
if isinstance(part, (int, float)):
modified_parts.append(part)
else:
return str_value
return transform_to_intended_dt(modified_parts)

return str_value


def nested_dict_to_slash_separated_path(nested_dict: dict,
flattened_dict: dict,
parent_path=''):
"""Convert nested dict into slash separeted path upto certain level."""
sep = '/'

for key, val in nested_dict.items():
path = parent_path + sep + key
if isinstance(val, dict):
nested_dict_to_slash_separated_path(val, flattened_dict, path)
else:
flattened_dict[path] = val
40 changes: 40 additions & 0 deletions pynxtools/dataconverter/readers/xrd/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# XRD Reader
With the XRD reader, data from X-ray diffraction experiment can be read and written into a NeXus file (h5 type file with extension .nxs) according to NXxrd_pan application definition in [NeXus](https://github.com/FAIRmat-NFDI/nexus_definitions). There are a few different methods of measuring XRD: 1. θ:2θ instruments (e.g. Rigaku H3R), and 2. θ:θ instrument (e.g. PANalytical X’Pert Pro). The goal with this reader is to support both of these methods.

**NOTE: This reader is still under development. As of now, the reader can only handle files with the extension `.xrdml` , obtained with PANalytical X’Pert Pro version 1.5 (method 2 described above). Currently we are wtoking to include more file types and file versions.**

## Contact Person in FAIRmat
In principle, you can reach out to any member of Area B of the FAIRmat consortium, but Rubel Mozumder could be more reasonable for the early response.

## Parsers
Though, in computer science, parser is a process that reads code into smaller parts (called tocken) with relations among tockens in a tree diagram. The process helps compiler to understand the tocken relationship of the source code.

The XRD reader calls a program or class (called parser) that reads the experimenal input file and re-organises the different physical/experiment concepts or properties in a certain structure which is defined by developer.

### class pynxtools.dataconverter.readers.xrd.xrd_parser.XRDMLParser

**inputs:**
file_path: Full path of the input file.

**Important method:**
get_slash_separated_xrd_dict() -> dict

This method can be used to check if all the data from the input file have been read or not, it returns the slash separated dict as described.


### Other Parsers
**Coming Soon!!**

### How To
The reader can be run from Jupyter-notebook or Jupyter-lab with the following command:

```sh
! dataconverter \
--reader xrd \
--nxdl NXxrd_pan \
--input-file $<xps-file location> \
--input-file $<eln-file location> \
--output <output-file location>.nxs
```

An example file can be found here in GitLab in [nomad-remote-tools-hub](https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-remote-tools-hub/-/tree/develop/docker/xrd) feel free to vist and try out the reader.
15 changes: 15 additions & 0 deletions pynxtools/dataconverter/readers/xrd/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading

0 comments on commit fabe938

Please sign in to comment.