From 3ff39eb909e34375296bfe960fd6930ca618588f Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Tue, 30 Apr 2024 14:27:09 +0200
Subject: [PATCH 1/9] initial start of docs
---
.github/workflows/build_docs.yml | 27 +++++++++++
docs/explanation/appdefs.md | 1 +
docs/explanation/contextualization.md | 8 ++++
docs/explanation/data_processing.md | 1 +
docs/explanation/implementation.md | 28 +++++++++++
docs/explanation/nomad_integration.md | 1 +
docs/how-tos/build_a_reader.md | 57 ++++++++++++++++++++++
docs/index.md | 68 ++++++++++++++++++++++++++
docs/macros.py | 17 +++++++
docs/reference/phi.md | 21 ++++++++
docs/reference/scienta.md | 16 +++++++
docs/reference/specs.md | 17 +++++++
docs/reference/vms.md | 31 ++++++++++++
docs/stylesheets/extra.css | 69 +++++++++++++++++++++++++++
docs/tutorial/installation.md | 14 ++++++
docs/tutorial/nexusio.md | 1 +
docs/tutorial/nomad.md | 10 ++++
docs/tutorial/standalone.md | 39 +++++++++++++++
mkdocs.yaml | 56 ++++++++++++++++++++++
19 files changed, 482 insertions(+)
create mode 100644 .github/workflows/build_docs.yml
create mode 100644 docs/explanation/appdefs.md
create mode 100644 docs/explanation/contextualization.md
create mode 100644 docs/explanation/data_processing.md
create mode 100644 docs/explanation/implementation.md
create mode 100644 docs/explanation/nomad_integration.md
create mode 100644 docs/how-tos/build_a_reader.md
create mode 100644 docs/index.md
create mode 100644 docs/macros.py
create mode 100644 docs/reference/phi.md
create mode 100644 docs/reference/scienta.md
create mode 100644 docs/reference/specs.md
create mode 100644 docs/reference/vms.md
create mode 100644 docs/stylesheets/extra.css
create mode 100644 docs/tutorial/installation.md
create mode 100644 docs/tutorial/nexusio.md
create mode 100644 docs/tutorial/nomad.md
create mode 100644 docs/tutorial/standalone.md
create mode 100644 mkdocs.yaml
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
new file mode 100644
index 00000000..116e8e5e
--- /dev/null
+++ b/.github/workflows/build_docs.yml
@@ -0,0 +1,27 @@
+name: build_docs
+on:
+ push:
+ branches: [main]
+permissions:
+ contents: write
+jobs:
+ deploy:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Configure Git Credentials
+ run: |
+ git config user.name github-actions[bot]
+ git config user.email 41898282+github-actions[bot]@users.noreply.github.com
+ - uses: actions/setup-python@v4
+ with:
+ python-version: 3.x
+ - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
+ - uses: actions/cache@v3
+ with:
+ key: mkdocs-material-${{ env.cache_id }}
+ path: .cache
+ restore-keys: |
+ mkdocs-material-
+ - run: pip install ".[docs]"
+ - run: mkdocs gh-deploy --force
\ No newline at end of file
diff --git a/docs/explanation/appdefs.md b/docs/explanation/appdefs.md
new file mode 100644
index 00000000..3d066bbd
--- /dev/null
+++ b/docs/explanation/appdefs.md
@@ -0,0 +1 @@
+# The NeXus application definitions: NXmpes and NXxps
\ No newline at end of file
diff --git a/docs/explanation/contextualization.md b/docs/explanation/contextualization.md
new file mode 100644
index 00000000..ef556a72
--- /dev/null
+++ b/docs/explanation/contextualization.md
@@ -0,0 +1,8 @@
+# How to map pieces of information to NeXus
+
+
\ No newline at end of file
diff --git a/docs/explanation/data_processing.md b/docs/explanation/data_processing.md
new file mode 100644
index 00000000..40c66fa3
--- /dev/null
+++ b/docs/explanation/data_processing.md
@@ -0,0 +1 @@
+# Data processing with CasaXPS
\ No newline at end of file
diff --git a/docs/explanation/implementation.md b/docs/explanation/implementation.md
new file mode 100644
index 00000000..b5a4ba51
--- /dev/null
+++ b/docs/explanation/implementation.md
@@ -0,0 +1,28 @@
+# Implementation design
+
+
diff --git a/docs/explanation/nomad_integration.md b/docs/explanation/nomad_integration.md
new file mode 100644
index 00000000..1a5f95b4
--- /dev/null
+++ b/docs/explanation/nomad_integration.md
@@ -0,0 +1 @@
+# NOMAD integration
\ No newline at end of file
diff --git a/docs/how-tos/build_a_reader.md b/docs/how-tos/build_a_reader.md
new file mode 100644
index 00000000..9d37effb
--- /dev/null
+++ b/docs/how-tos/build_a_reader.md
@@ -0,0 +1,57 @@
+# How to build your own reader
+
+Your current data is not supported yet? Don't worry, the following how-to will guide you how to write a reader your own data.
+
+## pynxtools-xps supports your format, but some groups and fields are different
+
+Good! The basic functionality to read your data is already in place. Before you start writing your own reader, consider two options:
+1) You can modify the default [config files](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/pynxtools_xps/config).
+2) Consider opening a [pull request on the GitHub repository](https://github.com/FAIRmat-NFDI/pynxtools-xps/pulls) modifying the existing reader.
+
+## You have a completely new data format
+
+You will have to write a new sub-reader inside pynxtools-xps. There are multiple steps to get started:
+
+### Development install
+
+You should start with an devlopment install of the package with its dependencies:
+
+```shell
+git clone https://github.com/FAIRmat-NFDI/pynxtools-xps.git \\
+ --branch main \\
+ --recursive pynxtools_xps
+cd pynxtools_xps
+python -m pip install --upgrade pip
+python -m pip install -e .
+python -m pip install -e ".[dev,consistency_with_pynxtools]"
+```
+
+There is also a [pre-commit hook](https://pre-commit.com/#intro) available
+which formats the code and checks the linting before actually commiting.
+It can be installed with
+```shell
+pre-commit install
+```
+from the root of this repository.
+
+### Design strategy
+The development process is modular so that new parsers can be added. The design logic is the following:
+1. First, [`XpsDataFileParser`](https://github.com/FAIRmat-NFDI/pynxtools-xps/blob/main/pynxtools_xps/file_parser.py#L36) selects the proper parser based on the file extensions of the provided files. It then calls a sub-parser that can read files with such extensions and calls the `parse_file` function of that reader. In addition, it selects a proper config file from
+the `config` subfolder.
+2. Afterwards, the NXmpes NXDL template is filled with the data in `XpsDataFileParser` using the [`config`](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/pynxtools_xps/config) file. Data that is not in the given main files can be added through the ELN file (and must be added for required fields in NXmpes).
+
+### Write your reader
+TODO!
+
+### Test the software
+There exists a basic test framework written in [pytest](https://docs.pytest.org/en/stable/) which can be used as follows:
+```shell
+python -m pytest -sv tests
+```
+You should add test data and add your reader to the `test_params` in the `test_reader.py` script.
+
+# Further details
+
+[NXmpes](https://fairmat-nfdi.github.io/nexus_definitions/classes/contributed_definitions/NXmpes.html)
+
+[NXxps](https://fairmat-nfdi.github.io/nexus_definitions/classes/contributed_definitions/NXxps.html)
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 00000000..ee81787c
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,68 @@
+---
+hide: toc
+---
+
+# Documentation for pynxtools-xps:
+
+pynxtools-xps is a free, and open-source data software for harmonizing X-ray photolectron spectroscopy data and metadata for research data management using [NeXus](https://www.nexusformat.org/), implemented with the goal to make scientific research data FAIR (findable, accessible, interoperable and reusable).
+
+pynxtools-xps, which is a plugin for [pynxtools](https://github.com/FAIRmat-NFDI/pynxtools), provides a tool for reading data from various propietary and open data formats from technology partners and the wider XPS community and standardizing it such that it is compliant with the NeXus application definitions [`NXmpes`](https://fairmat-nfdi.github.io/nexus_definitions/classes/contributed_definitions/NXmpes.html) and [`NXxps`](https://fairmat-nfdi.github.io/nexus_definitions/classes/contributed_definitions/NXxps.html), which is an extension of `NXmpes`. pynxtools-xps is devloped both as a standalone reader and as a tool within [NOMAD](https://nomad-lab.eu/), which is the open-source data management platform for materials science we are developing with [FAIRmat](https://www.fairmat-nfdi.eu/fairmat/).
+
+pynxtools-xps solves the challenge of using heterogeneous and unfindable data formats which is common in X-ray Photoelectron Spectroscopy. In addition, it provides an interface for writing readers for different file formats to be mapped to NeXus.
+
+pynxtools-xps is useful for scientists from the XPS community that deal with heterogeneous data, for technology partners and data providers looking for ways to make their data FAIRer, and for research groups that want to organize thier data using NeXus and NOMAD.
+
+
+
+
+### Tutorial
+
+A series of tutorials giving you an overview on how to store or convert your XPS data to NeXus compliant files.
+
+- [Installation guide](tutorial/installation.md)
+- [Standalone usage](tutorial/standalone.md)
+- [How to use a NeXus/HDF5 file](tutorial/nexusio.md)
+- [Usage on NOMAD](tutorial/nomad.md)
+
+
+
+
+### How-to guides
+
+How-to guides provide step-by-step instructions for a wide range of tasks, with the overarching topics:
+
+- [How to create your own reader for your XPS data](how-tos/build_a_reader.md)
+
+
+
+
+
+### Learn
+
+The explanation section provides background knowledge on the implementation design, how the data is structured, how data processing can be incorporated, how the integration works in NOMAD, and more.
+
+- [Implementation design](explanation/implementation.md)
+- [NXmpes and NXxps](explanation/appdefs.md)
+- [How to map pieces of information to NeXus](explanation/contextualization.md)
+
+
+
+
+
+
+### Reference
+
+Here you can learn which specific measurement setups and file formats from technology partners pynxtools-xps currently supports.
+
+- [Data exported by SPECS spectrometers](reference/specs.md)
+- [Data exported by Scienta Omicron spectrometers](reference/scienta.md)
+- [Data exported by Phi spectrometers](reference/phi.md)
+- [VAMAS ISO Standard format](reference/vms.md)
+
+
+
+
+Project and community
+- [Code guidelines](reference/code_guidelines.md)
+
+Thinking about using NOMAD for your next project? Get in touch!
diff --git a/docs/macros.py b/docs/macros.py
new file mode 100644
index 00000000..b9deb16b
--- /dev/null
+++ b/docs/macros.py
@@ -0,0 +1,17 @@
+"""
+MKdocs macros for the documentation
+"""
+
+
+def define_env(env):
+ """
+ This is the hook for defining variables, macros and filters
+
+ - variables: the dictionary that contains the environment variables
+ - macro: a decorator function, to declare a macro.
+ - filter: a function with one of more arguments,
+ used to perform a transformation
+ """
+
+ # add to the dictionary of variables available to markdown pages:
+ env.variables["version"] = "2023.10" # Figure out from setuptools-scm eventually
diff --git a/docs/reference/phi.md b/docs/reference/phi.md
new file mode 100644
index 00000000..d639ac1d
--- /dev/null
+++ b/docs/reference/phi.md
@@ -0,0 +1,21 @@
+# Data from Phi VersaProbe 4 instruments
+
+The reader supports [Phi MultiPak](https://www.phi.com/surface-analysis-equipment/genesis.html#software:multi-pak-data-reduction-software/) .spe (single spectra) and .pro (sputter profile / external parameter scan / ....) files, which is the propietary format of PHI Electronics used for their VersaProbe 4 instrumens. The Phi MultiPak software version that was used to measure this data is SS 3.3.3.2.1.
+
+
+
+Example data for this file format is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/phi).
+
+The example conversion can be run with the following commands:
+
+### For the .spe data (single spectrum):
+```console_
+user@box:~$ dataconverter SnO2_10nm.spe eln_data_phi.yaml --reader xps --nxdl NXmpes --output SnO2_10nm.spe.nxs
+```
+### For the .pro data (profiling):
+```console_
+user@box:~$ dataconverter SnO2_10nm_1.pro eln_data_phi.yaml --reader xps --nxdl NXmpes --output SnO2_10nm_1.pro.nxs
+```
+
+## Acknowledgments
+We thank Sebastian Benz and Dr. Joachim Sann from [Justus-Liebig-Universität Gießen](https://www.uni-giessen.de/de) for providing these example data sets.
diff --git a/docs/reference/scienta.md b/docs/reference/scienta.md
new file mode 100644
index 00000000..902fec90
--- /dev/null
+++ b/docs/reference/scienta.md
@@ -0,0 +1,16 @@
+# Data from Scienta Omicron instruments
+
+The reader supports reading data exported as .txt from Scienta Omicron [Scienta Omicron](https://www.scientaomicron.com/en/) instruments.
+
+
+
+Example data is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/scienta).
+
+The example conversion can be run with the following command.
+
+```console
+user@box:~$ dataconverter Cu-HHTP_*.txt eln_data.yaml --reader xps --nxdl NXmpes --output Cu-HHTP.nxs
+```
+
+## Acknowledgments
+We thank Dr. Alexei Nefedov from [KIT](https://www.ifg.kit.edu/21_1296.php) for providing the example data set.
\ No newline at end of file
diff --git a/docs/reference/specs.md b/docs/reference/specs.md
new file mode 100644
index 00000000..b0f20633
--- /dev/null
+++ b/docs/reference/specs.md
@@ -0,0 +1,17 @@
+# Data from Scienta Omicron instruments
+
+The reader supports [SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/) files, which is the propietary format of SPECS GmbH. Currently, the following file extensions are supported:
+- .sle: [SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/) file (software version: v1.6, >v4)
+- .xml: SpecsLab 2files, XML format from SPECS GmbH (software version: v4.63 tested, other versions also work)
+- .xy: SpecsLabProdigy export format in XY format (including all export settings)
+
+
+
+Example data for the SLE reader is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/specs).
+
+The example conversion can be run with the following command.
+```console
+user@box:~$ dataconverter --params-file params.yaml
+```
+
+Note that the `params.yaml` file contains the `remove_align` keyword which is special for the SLE parser. It allows removal of alignment spectra that were taken during the experiment. For this example, it considerably speeds up the conversion.
\ No newline at end of file
diff --git a/docs/reference/vms.md b/docs/reference/vms.md
new file mode 100644
index 00000000..79e9d698
--- /dev/null
+++ b/docs/reference/vms.md
@@ -0,0 +1,31 @@
+# VAMAS ISO standard (VMS)
+
+## Basic .vms data
+
+The reader supports VAMAS (.vms) files, the ISO standard data transfer format ([ISO 14976](https://www.iso.org/standard/24269.html)) for X-ray photoelectron spectroscopy. The data can be stored both in REGULAR (i.e, with an equally spaced energy axis) as well as IRREGULAR mode. The data was measured with and exported from[SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/).
+
+Example data for the SLE reader is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/vms).
+
+The example conversion for the REGULAR file can be run with the following command:
+
+```sh
+! dataconverter \
+regular.vms \
+eln_data_vms.yaml \
+--reader xps \
+--nxdl NXmpes \
+--output vms_regular_example.nxs \
+```
+
+The example conversion for the IRREGULAR file can be run with the following command:
+
+```sh
+! dataconverter \
+irregular.vms \
+eln_data_vms.yaml \
+--reader xps \
+--nxdl NXmpes \
+--output vms_irregular_example.nxs \
+```
+
+
\ No newline at end of file
diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css
new file mode 100644
index 00000000..321e0870
--- /dev/null
+++ b/docs/stylesheets/extra.css
@@ -0,0 +1,69 @@
+
+.md-header__button.md-logo :where(img,svg) {
+ width: 100%;
+ height: 30px;
+}
+
+.md-header, .md-header__inner {
+ background-color: #fff;
+ color: #2A4CDF;
+}
+
+.md-header[data-md-state=shadow] {
+ box-shadow: 0px 2px 4px -1px rgb(0 0 0 / 20%), 0px 4px 5px 0px rgb(0 0 0 / 14%), 0px 1px 10px 0px rgb(0 0 0 / 12%);
+ transition: box-shadow 200ms linear;
+}
+
+.md-header__inner {
+ height: 80px;
+}
+
+.md-header__topic {
+ font-size: 24px;
+}
+
+.md-footer {
+ background-color: #2A4CDF;
+}
+
+.md-search__form:hover {
+ background-color: rgba(0,0,0,.13);
+}
+
+.md-typeset h1 {
+ color: black;
+ font-weight: 700;
+}
+
+.youtube {
+ position: relative;
+ width: 100%;
+ height: 0;
+ padding-bottom: 56.25%;
+}
+
+.youtube iframe {
+ position: absolute;
+ top: 0;
+ left: 0;
+ width: 100%;
+ height: 100%;
+}
+
+.home-grid {
+ display: grid;
+ grid-template-columns: 1fr 1fr;
+ grid-column-gap: 24px;
+ row-gap: 24px;
+}
+
+.home-grid div {
+ border-radius: 4px;
+ padding: 24px;
+ background-color: #f3e9d9;
+}
+
+.home-grid h3 {
+ margin-top: 0;
+ font-weight: 700;
+}
\ No newline at end of file
diff --git a/docs/tutorial/installation.md b/docs/tutorial/installation.md
new file mode 100644
index 00000000..1c7e4a36
--- /dev/null
+++ b/docs/tutorial/installation.md
@@ -0,0 +1,14 @@
+# Installation
+
+It is recommended to use python 3.11 with a dedicated virtual environment for this package.
+Learn how to manage [python versions](https://github.com/pyenv/pyenv) and
+[virtual environments](https://realpython.com/python-virtual-environments-a-primer/).
+
+This package is a reader plugin for [`pynxtools`](https://github.com/FAIRmat-NFDI/pynxtools) and thus should be installed together with `pynxtools`:
+
+
+```shell
+pip install pynxtools[xps]
+```
+
+for the latest development version.
diff --git a/docs/tutorial/nexusio.md b/docs/tutorial/nexusio.md
new file mode 100644
index 00000000..a25386dd
--- /dev/null
+++ b/docs/tutorial/nexusio.md
@@ -0,0 +1 @@
+# How to use a NeXus/HDF5 file
diff --git a/docs/tutorial/nomad.md b/docs/tutorial/nomad.md
new file mode 100644
index 00000000..325ca559
--- /dev/null
+++ b/docs/tutorial/nomad.md
@@ -0,0 +1,10 @@
+# Convert data to NeXus using NOMAD Oasis
+
+You not only want to use pynxtools-xps as a standalone, but also use a more comprehensive research data management system.
+[NOMAD](https://nomad-lab.eu/nomad-lab/tutorials.html) is a great choice to make it easier than ever to work with your research data. At this point you are probably have an idea of what [FAIR data](https://www.nature.com/articles/sdata201618) is. Even if you don't, it doesn't matter. NOMAD provides a simple graphical interface that let's you collect and have your data ready for publication.
+
+## Steps
+
+Go to ```Publish -> Uploads```
+
+TODO: add the correct steps
\ No newline at end of file
diff --git a/docs/tutorial/standalone.md b/docs/tutorial/standalone.md
new file mode 100644
index 00000000..d0112e4d
--- /dev/null
+++ b/docs/tutorial/standalone.md
@@ -0,0 +1,39 @@
+# Convert X-ray spectroscopy data and metadata to NeXus
+
+## Who is this tutorial for?
+
+This document is for people who want to use this reader as a standalone standardize their research data by converting these
+into a NeXus standardized format.
+
+## What should you should know before this tutorial?
+
+- You should have a basic understanding of [FAIRmat NeXus](https://github.com/FAIRmat/nexus_definitions) and [pynxtools](https://github.com/FAIRmat/pynxtools)
+- You should have a basic understanding of using Python and Jupyter notebooks via [JupyterLab](https://jupyter.org)
+
+## What you will know at the end of this tutorial?
+
+You will have a basic understanding how to use pynxtools-xps for converting your XPS data to a NeXus/HDF5 file.
+
+## Steps
+
+### Installation
+See here for how to install pynxtools together with the XPS reader plugin.
+
+### Running the reader from the command line
+An example script to run the XPS reader in `pynxtools`:
+```sh
+ ! dataconverter \
+--reader xps \
+--nxdl NXmpes \
+--input-file $ \
+--input-file $ \
+--output .nxs
+```
+Note that none of the supported file format have data/values for all required and recommended fields and attributes in NXmpes. In order for the validation step of the XPS reader to pass, you need to provide an ELN file that contains the missing values. Example raw and converted data can be found in [*pynxtools_xps/examples*](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples).
+
+TODO: add more steps!
+
+**Congrats! You now have a FAIR NeXus file!**
+
+The above-mentioned parsing is also integrated into the NOMAD research data management system.
+Feel invited to try out the respective tutorial [here]((tutorial/nomad.md)
diff --git a/mkdocs.yaml b/mkdocs.yaml
new file mode 100644
index 00000000..46a31704
--- /dev/null
+++ b/mkdocs.yaml
@@ -0,0 +1,56 @@
+site_name: pynxtools-xps
+site_description: |
+ The documentation for the pynxtools-xps plugin
+site_author: The FAIRmat authors
+nav:
+ - Home: index.md
+ - Tutorials:
+ - tutorial/installation.md
+ - tutorial/standalone.md
+ - tutorial/nexusio.md
+ - tutorial/nomad.md
+ - How-tos:
+ - how-tos/build_a_reader.md
+ - Learn:
+ - explanation/implementation.md
+ - explanation/appdefs.md
+ - explanation/contextualization.md
+ - explanation/data_processing.md
+ - explanation/nomad_integration.md
+ - Reference:
+ - reference/specs.md
+ - reference/scienta.md)
+ - reference/phi.md
+ - reference/vms.md
+theme:
+ name: material
+ features:
+ - content.code.copy
+ - content.code.annotate
+markdown_extensions:
+ - attr_list
+ - md_in_html
+ - admonition
+ - pymdownx.details
+ - pymdownx.highlight:
+ anchor_linenums: true
+ - pymdownx.inlinehilite
+ - pymdownx.snippets
+ - toc:
+ permalink: True
+ - pymdownx.arithmatex:
+ generic: true
+ - pymdownx.emoji
+ - pymdownx.extra
+ - pymdownx.superfences:
+ custom_fences:
+ - name: mermaid
+ class: mermaid
+ format: !!python/name:pymdownx.superfences.fence_code_format
+use_directory_urls: false
+plugins:
+ - search
+ - macros:
+ module_name: docs/macros
+extra_css:
+ - stylesheets/extra.css
\ No newline at end of file
From d6baaa98a4ad863c590c1553839c0eba0612ccef Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Tue, 30 Apr 2024 15:08:13 +0200
Subject: [PATCH 2/9] use branch for mkdocs testing in workflow for now
---
.github/workflows/build_docs.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
index 116e8e5e..c798467c 100644
--- a/.github/workflows/build_docs.yml
+++ b/.github/workflows/build_docs.yml
@@ -1,7 +1,7 @@
name: build_docs
on:
push:
- branches: [main]
+ branches: ["*"]
permissions:
contents: write
jobs:
From 24ef9fedd642c261887ff3984f3634d66e92c69e Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Tue, 30 Apr 2024 15:17:32 +0200
Subject: [PATCH 3/9] update pyproject and dev-requirements
---
dev-requirements.txt | 72 +++++++++++++++++++++++++++++++++++++++++---
pyproject.toml | 6 ++++
2 files changed, 73 insertions(+), 5 deletions(-)
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 48fe845b..2515543e 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
-# pip-compile --extra=dev --output-file=dev-requirements.txt pyproject.toml
+# pip-compile --extra=dev --extra=docs --output-file=dev-requirements.txt pyproject.toml
#
asciitree==0.3.3
# via zarr
@@ -18,6 +18,8 @@ attrs==23.1.0
# via
# cattrs
# requests-cache
+babel==2.14.0
+ # via mkdocs-material
backcall==0.2.0
# via ipython
blosc2==2.0.0
@@ -35,6 +37,7 @@ charset-normalizer==3.3.2
click==8.1.7
# via
# dask
+ # mkdocs
# pip-tools
# pynxtools
cloudpickle==3.0.0
@@ -44,6 +47,8 @@ colorama==0.4.6
# build
# click
# ipython
+ # mkdocs
+ # mkdocs-material
# pytest
# tqdm
comm==0.2.0
@@ -107,6 +112,8 @@ fsspec==2023.10.0
# hyperspy
future==0.18.3
# via uncertainties
+ghp-import==2.1.0
+ # via mkdocs
gitdb==4.0.11
# via gitpython
gitpython==3.1.40
@@ -165,7 +172,11 @@ ipython==8.12.3
jedi==0.19.1
# via ipython
jinja2==3.1.2
- # via hyperspy
+ # via
+ # hyperspy
+ # mkdocs
+ # mkdocs-macros-plugin
+ # mkdocs-material
joblib==1.3.2
# via scikit-learn
jupyter-client==8.6.0
@@ -192,8 +203,15 @@ locket==1.0.0
# via partd
lxml==4.9.3
# via fabio
+markdown==3.6
+ # via
+ # mkdocs
+ # mkdocs-material
+ # pymdown-extensions
markupsafe==2.1.3
- # via jinja2
+ # via
+ # jinja2
+ # mkdocs
matplotlib==3.7.4
# via
# ase
@@ -212,7 +230,25 @@ matplotlib-inline==0.1.6
matplotlib-scalebar==0.8.1
# via orix
mergedeep==1.3.4
- # via pynxtools
+ # via
+ # mkdocs
+ # mkdocs-get-deps
+ # pynxtools
+mkdocs==1.6.0
+ # via
+ # mkdocs-macros-plugin
+ # mkdocs-material
+ # pynxtools-xps (pyproject.toml)
+mkdocs-get-deps==0.2.0
+ # via mkdocs
+mkdocs-macros-plugin==1.0.5
+ # via pynxtools-xps (pyproject.toml)
+mkdocs-material==9.5.20
+ # via pynxtools-xps (pyproject.toml)
+mkdocs-material-extensions==1.3.1
+ # via
+ # mkdocs-material
+ # pynxtools-xps (pyproject.toml)
mpmath==1.3.0
# via sympy
msgpack==1.0.7
@@ -316,11 +352,14 @@ packaging==23.2
# hyperspy
# ipykernel
# matplotlib
+ # mkdocs
# pooch
# pytest
# scikit-image
# tables
# xarray
+paginate==0.5.6
+ # via mkdocs-material
pandas==2.0.3
# via
# ifes-apt-tc-data-modeling
@@ -330,6 +369,8 @@ parso==0.8.3
# via jedi
partd==1.4.1
# via dask
+pathspec==0.12.1
+ # via mkdocs
pickleshare==0.7.5
# via ipython
pillow==10.0.1
@@ -347,6 +388,7 @@ pip-tools==7.3.0
platformdirs==4.0.0
# via
# jupyter-core
+ # mkdocs-get-deps
# pooch
# requests-cache
# virtualenv
@@ -379,7 +421,11 @@ pycifrw==4.4.6
pyfai==2023.9.0
# via pyxem
pygments==2.17.2
- # via ipython
+ # via
+ # ipython
+ # mkdocs-material
+pymdown-extensions==10.8.1
+ # via mkdocs-material
pynxtools==0.0.10
# via pynxtools-xps (pyproject.toml)
pyparsing==3.1.1
@@ -397,10 +443,12 @@ pytest-timeout==2.2.0
# via pynxtools-xps (pyproject.toml)
python-dateutil==2.8.2
# via
+ # ghp-import
# hyperspy
# ipyparallel
# jupyter-client
# matplotlib
+ # mkdocs-macros-plugin
# pandas
pytz==2023.3.post1
# via
@@ -418,8 +466,15 @@ pyyaml==6.0.1
# dask
# hyperspy
# kikuchipy
+ # mkdocs
+ # mkdocs-get-deps
+ # mkdocs-macros-plugin
# pre-commit
+ # pymdown-extensions
# pynxtools
+ # pyyaml-env-tag
+pyyaml-env-tag==0.1
+ # via mkdocs
pyzmq==25.1.1
# via
# ipykernel
@@ -427,9 +482,12 @@ pyzmq==25.1.1
# jupyter-client
radioactivedecay==0.4.21
# via ifes-apt-tc-data-modeling
+regex==2024.4.28
+ # via mkdocs-material
requests==2.31.0
# via
# hyperspy
+ # mkdocs-material
# pooch
# pynxtools
# requests-cache
@@ -485,6 +543,8 @@ sympy==1.12
# radioactivedecay
tables==3.8.0
# via ifes-apt-tc-data-modeling
+termcolor==2.4.0
+ # via mkdocs-macros-plugin
threadpoolctl==3.2.0
# via scikit-learn
tifffile==2023.7.10
@@ -564,6 +624,8 @@ urllib3==2.1.0
# types-requests
virtualenv==20.25.0
# via pre-commit
+watchdog==4.0.0
+ # via mkdocs
wcwidth==0.2.12
# via
# prettytable
diff --git a/pyproject.toml b/pyproject.toml
index dc632156..a6eb1ed0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,12 @@ dev = [
consistency_with_pynxtools = [
"pynxtools>=0.1.1",
]
+docs = [
+ "mkdocs",
+ "mkdocs-material",
+ "mkdocs-material-extensions",
+ "mkdocs-macros-plugin",
+]
[tool.setuptools.package-data]
pynxtools_xps = ["*.json"]
From 8de0d8c04e45f73fad2141d73d181c758d849885 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Tue, 30 Apr 2024 15:39:23 +0200
Subject: [PATCH 4/9] small fixes seen after first build of docs
---
docs/index.md | 9 +-
docs/reference/code_guidelines.md | 278 ++++++++++++++++++++++++++++++
docs/reference/specs.md | 5 +-
docs/reference/vms.md | 2 +-
4 files changed, 287 insertions(+), 7 deletions(-)
create mode 100644 docs/reference/code_guidelines.md
diff --git a/docs/index.md b/docs/index.md
index ee81787c..0dbf323f 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -2,7 +2,7 @@
hide: toc
---
-# Documentation for pynxtools-xps:
+# Documentation for pynxtools-xps
pynxtools-xps is a free, and open-source data software for harmonizing X-ray photolectron spectroscopy data and metadata for research data management using [NeXus](https://www.nexusformat.org/), implemented with the goal to make scientific research data FAIR (findable, accessible, interoperable and reusable).
@@ -22,7 +22,7 @@ A series of tutorials giving you an overview on how to store or convert your XPS
- [Installation guide](tutorial/installation.md)
- [Standalone usage](tutorial/standalone.md)
- [How to use a NeXus/HDF5 file](tutorial/nexusio.md)
-- [Usage on NOMAD](tutorial/nomad.md)
+- [Usage in NOMAD](tutorial/nomad.md)
@@ -63,6 +63,7 @@ Here you can learn which specific measurement setups and file formats from techn
Project and community
-- [Code guidelines](reference/code_guidelines.md)
-Thinking about using NOMAD for your next project? Get in touch!
+- [NOMAD code guidelines](reference/code_guidelines.md)
+
+Any questions or suggestions? [Get in touch!](https://www.fair-di.eu/fairmat/about-fairmat/team-fairmat)
diff --git a/docs/reference/code_guidelines.md b/docs/reference/code_guidelines.md
new file mode 100644
index 00000000..b70c5ee2
--- /dev/null
+++ b/docs/reference/code_guidelines.md
@@ -0,0 +1,278 @@
+# NOMAD code guidelines
+
+NOMAD has a long history and many people are involved in its development. These
+guidelines are set out to keep the code quality high and consistent. Please read
+them carefully.
+
+## Principles and rules
+
+- simple first, complicated only when necessary
+
+- search and adopt generic established 3rd-party solutions before implementing specific
+ solutions
+
+- only unidirectional dependencies between components/modules, no circles
+
+- only one language: Python (except GUI of course)
+
+The are some *rules* or better strong *guidelines* for writing code. The following
+applies to all Python code (and where applicable, also to Javascript and other code):
+
+- Use an IDE (e.g. [VS Code](https://code.visualstudio.com/){:target="_blank"}) or otherwise automatically
+ enforce
+ [code formatting and linting](https://code.visualstudio.com/docs/python/linting){:target="_blank"}.
+
+- Use `nomad qa` before committing. This will run all tests, static type checks, linting,
+ etc.
+
+- Test the public interface of each submodule (i.e. Python file).
+
+- There is a style guide to Python. Write
+ [PEP 8](https://www.python.org/dev/peps/pep-0008/){:target="_blank"}-compliant Python code. An exception
+ is the line cap at 79, which can be broken but keep it 90-ish.
+
+- Be [Pythonic](https://docs.python-guide.org/writing/style/){:target="_blank"} and watch
+ [this talk about best practices](https://www.youtube.com/watch?v=wf-BqAjZb8M){:target="_blank"}.
+
+- Add docstrings to the *public* interface of each submodule (i.e. Python file). This
+ includes APIs that are exposed to other submodules (i.e. other Python files).
+
+- The project structure follows
+ [this guide](https://docs.python-guide.org/writing/structure/){:target="_blank"}. Keep it!
+
+- Write tests for all contributions.
+
+- Adopt *Clean Code* practices. Here is a good
+ [introductory talk to Clean Code](https://youtu.be/7EmboKQH8lM){:target="_blank"}.
+
+## Enforcing rules with CI/CD
+
+These *guidelines* are partially enforced by CI/CD. As part of CI all tests are run on all
+branches; further we run a *linter*, *PEP 8* checker, and *mypy* (static type checker).
+You can run `nomad qa` to run all these tests and checks before committing.
+
+See [the contributing guide](../howto/develop/contrib.md) for more details on how to work with issues,
+branches, merge requests, and CI/CD.
+
+
+## Documenting code
+
+Write [Clean Code](https://youtu.be/7EmboKQH8lM){:target="_blank"} that is easy to comprehend.
+
+However, you should document the whole publicly exposed interface of a module. For Python
+this includes most classes and functions that you will write, for React its exported
+components and their props.
+
+For all functionality that is exposed to clients (APIs, CLI, schema base classes and
+annotations, UI functionality), you must consider to add explanations, tutorials, and
+examples to the documentation system (i.e. the `docs` folder). This is built with
+[mkdocs](https://www.mkdocs.org/){:target="_blank"} and published as part of each NOMAD installation.
+Also mind `nomad/mkdocs.py` and `mkdocs.yaml` and have a look at used plugins and extra
+functions, e.g. this includes generation of Markdown from `examples` or Pydantic models.
+
+To document Python functions and classes, use Google
+[docstrings](https://github.com/NilsJPWerner/autoDocstring/blob/HEAD/docs/google.md){:target="_blank"}.
+Use Markdown if you need to add markup but try to reduce this to a minimum.
+You can use VS Code plugins like
+[autoDocstring](https://github.com/NilsJPWerner/autoDocstring/tree/f7bc9f427d5ebcd87e6f5839077a87ecd1cbb404){:target="_blank"}
+to help.
+Always use single quotes, pad single-line docstrings with spaces and start multi-line ones
+on a new line.
+Here are a few examples:
+
+```python
+def generate_uuid() -> str:
+ '''Generates a base64 encoded Version 4 unique identifier. '''
+
+ return base64.encode(uuid4())
+
+def add(a: float, b: float) -> float:
+ '''
+ Adds two numbers.
+
+ Args:
+ a (float): One number.
+ b (float): The other number.
+
+ Returns:
+ float: The sum of a and b.
+ '''
+
+ return a + b
+```
+
+The only reason to comment individual lines is because there is absolutely no way
+to write it simple enough. The typical scenarios are:
+
+- workarounds to known issues with used dependencies
+
+- complex interactions between seemingly unrelated pieces of code that cannot be resolved
+ otherwise
+
+- code that has to be cumbersome due to performance optimizations
+
+**Do not** comment out code. We have Git for that.
+
+## Names and identifiers
+
+There is a certain terminology consistently used in this documentation and the source
+code. Use this terminology for identifiers.
+
+Do not use abbreviations. There are (few) exceptions: `proc` (processing), `exc` or
+`e` (exception), `calc` (calculation), `repo` (repository), `utils` (utilities), and
+`aux` (auxiliary).
+Other exceptions are `f` for file-like streams and `i` for index running variables,
+although the latter is almost never necessary in Python.
+
+Terms:
+
+- *upload*: A logical unit that comprises a collection of files uploaded by a user,
+ organized in a directory structure.
+
+- *entry*: An archive item, created by parsing a *mainfile*. Each entry belongs to an
+ upload and is associated with various metadata (an upload may have many entries).
+
+- *child entry*: Some parsers generate multiple entries -- a *main* entry plus some number
+ of *child* entries. Child entries are identified by the *mainfile* plus a *mainfile_key*
+ (string value).
+
+- *calculation*: denotes the results of either a theoretical computation created by CMS
+ code, or an experiment.
+
+- *raw file*: A user uploaded file, located somewhere in the upload's directory structure.
+
+- *mainfile*: A raw file identified as parsable, defining an entry of the upload in
+ question.
+
+- *aux file*: Additional files within an upload.
+
+- *entry metadata*: Some quantities of an entry that are searchable in NOMAD.
+
+- *archive data*: The normalized data of an entry in NOMAD's Metainfo-based format.
+
+Throughout NOMAD, we use different ids. If something is called *id*, it is usually a
+random uuid and has no semantic connection to the entity it identifies. If something is
+called a *hash* then it is a hash generated based on the entity it identifies. This means
+either the whole thing or just some properties of this entities.
+
+- The most common hash is the `entry_hash` based on `mainfile` and aux file contents.
+
+- The `upload_id` is a UUID assigned to the upload on creation. It never changes.
+
+- The `mainfile` is a path within an upload that points to a file identified as parsable.
+ This also uniquely identifies an entry within the upload.
+
+- The `entry_id` (previously called `calc_id`) uniquely identifies an entry. It is a hash
+ over the `mainfile` and respective `upload_id`. **NOTE:** For backward compatibility,
+ `calc_id` is also still supported in the API, but using it is strongly discouraged.
+
+- We often use pairs of `upload_id/entry_id`, which in many contexts allow to resolve an
+ entry-related file on the filesystem without having to ask a database about it.
+
+- The `pid` or (`coe_calc_id`) is a legacy sequential integer id, previously used to
+ identify entries. We still store the `pid` on these older entries for historical
+ purposes.
+
+- Calculation `handle` or `handle_id` are created based on those `pid`.
+ To create hashes we use :py:func:`nomad.utils.hash`.
+
+## Logging
+
+There are three important prerequisites to understand about nomad-FAIRDI's logging:
+
+- All log entries are recorded in a central Elasticsearch database. To make this database
+ useful, log entries must be sensible in size, frequency, meaning, level, and logger
+ name. Therefore, we need to follow some rules when it comes to logging.
+
+- We use a *structured* logging approach. Instead of encoding all kinds of information
+ in log messages, we use key-value pairs that provide context to a log *event*. In the
+ end, all entries are stored as JSON dictionaries with `@timestamp`, `level`,
+ `logger_name`, `event` plus custom context data. Keep events very short, most
+ information goes into the context.
+
+- We use logging to inform about the state of nomad-FAIRDI, not about user behavior,
+ input, or data. Do not confuse this when determining the log level for an event.
+ For example, a user providing an invalid upload file should never be an error.
+
+Please follow the following rules when logging:
+
+- If a logger is not already provided, only use :py:func:`nomad.utils.get_logger` to
+ acquire a new logger. Never use the built-in logging directly. These loggers work like
+ the system loggers, but allow you to pass keyword arguments with additional context
+ data. See also the [structlog docs](https://structlog.readthedocs.io/en/stable/){:target="_blank"}.
+
+- In many context, a logger is already provided (e.g. API, processing, parser,
+ normalizer). This provided logger has already context information bounded. So it is
+ important to use those instead of acquiring your own loggers. Have a look for methods
+ called `get_logger` or attributes called `logger`.
+
+- Keep events (what usually is called *message*) very short. Examples are:
+ *file uploaded*, *extraction failed*, etc.
+
+- Structure the keys for context information. When you analyze logs in ELK, you will
+ see that the set of all keys over all log entries can be quite large. Structure your
+ keys to make navigation easier. Use keys like `nomad.proc.parser_version` instead of
+ `parser_version`. Use module names as prefixes.
+
+- Don't log everything. Try to anticipate how you would use the logs in case of bugs,
+ error scenarios, etc.
+
+- Don't log sensitive data.
+
+- Think before logging data (especially dicts, list, NumPy arrays, etc.).
+
+- Logs should not be abused as a *printf*-style debugging tool.
+
+The following keys are used in the final logs that are piped to Logstash.
+Notice that the key name is automatically formed by a separate formatter and may differ
+from the one used in the actual log call.
+
+Keys that are autogenerated for all logs:
+
+- `@timestamp`: Timestamp for the log
+- `@version`: Version of the logger
+- `host`: Host name from which the log originated
+- `path`: Path of the module from which the log was created
+- `tags`: Tags for this log
+- `type`: *message_type* as set in the LogstashFormatter
+- `level`: Log level: `DEBUG`, `INFO`, `WARNING`, `ERROR`
+- `logger_name`: Name of the logger
+- `nomad.service`: Service name as configured in `config.py`
+- `nomad.release`: Release name as configured in `config.py`
+
+Keys that are present for events related to processing an entry:
+
+- `nomad.upload_id`: id of the currently processed upload
+- `nomad.entry_id`: id of the currently processed entry
+- `nomad.mainfile`: mainfile of the currently processed entry
+
+Keys that are present for events related to exceptions:
+
+- `exc_info`: Stores the full Python exception that was encountered. All uncaught
+ exceptions will be stored automatically here.
+
+- `digest`: If an exception was raised, the last 256 characters of the message are stored
+ automatically into this key. If you wish to search for exceptions in
+ [Kibana](https://www.elastic.co/de/kibana){:target="_blank"}, you will want to use this value as it will
+ be indexed unlike the full exception object.
+
+## Copyright notices
+
+We follow this
+[recommendation of the Linux Foundation](https://www.linuxfoundation.org/blog/2020/01/copyright-notices-in-open-source-software-projects/){:target="_blank"}
+for the copyright notice that is placed on top of each source code file.
+
+It is intended to provide a broad generic statement that allows all authors/contributors
+of the NOMAD project to claim their copyright, independent of their organization or
+individual ownership.
+
+You can simply copy the notice from another file. From time to time we can use a tool
+like [licenseheaders](https://pypi.org/project/licenseheaders/){:target="_blank"} to ensure correct
+notices. In addition we keep a purely informative AUTHORS file.
+
+## Git submodules and other "in-house" dependencies
+
+As the NOMAD ecosystem grows, you might develop libraries that are used by NOMAD instead
+of being part of its main codebase. The same guidelines should apply. You can use
+[GitHub Actions](https://github.com/features/actions){:target="_blank"} if your library is hosted on Github
+to ensure automated linting and tests.
diff --git a/docs/reference/specs.md b/docs/reference/specs.md
index b0f20633..ebf14cdf 100644
--- a/docs/reference/specs.md
+++ b/docs/reference/specs.md
@@ -1,7 +1,8 @@
-# Data from Scienta Omicron instruments
+# Data from SPECS instruments
The reader supports [SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/) files, which is the propietary format of SPECS GmbH. Currently, the following file extensions are supported:
-- .sle: [SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/) file (software version: v1.6, >v4)
+
+- .sle: [SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/) file software version: v1.6, >v4)
- .xml: SpecsLab 2files, XML format from SPECS GmbH (software version: v4.63 tested, other versions also work)
- .xy: SpecsLabProdigy export format in XY format (including all export settings)
diff --git a/docs/reference/vms.md b/docs/reference/vms.md
index 79e9d698..597994dd 100644
--- a/docs/reference/vms.md
+++ b/docs/reference/vms.md
@@ -2,7 +2,7 @@
## Basic .vms data
-The reader supports VAMAS (.vms) files, the ISO standard data transfer format ([ISO 14976](https://www.iso.org/standard/24269.html)) for X-ray photoelectron spectroscopy. The data can be stored both in REGULAR (i.e, with an equally spaced energy axis) as well as IRREGULAR mode. The data was measured with and exported from[SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/).
+The reader supports VAMAS (.vms) files, the ISO standard data transfer format ([ISO 14976](https://www.iso.org/standard/24269.html)) for X-ray photoelectron spectroscopy. The data can be stored both in REGULAR (i.e, with an equally spaced energy axis) as well as IRREGULAR mode. The data was measured with and exported from [SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/).
Example data for the SLE reader is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/vms).
From f85ed695d6e35a2f7a47a212214c63d8f97c774f Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Tue, 30 Apr 2024 16:47:00 +0200
Subject: [PATCH 5/9] implement docs for software design principles
---
docs/explanation/implementation.md | 37 ++++++++++++++----------------
docs/index.md | 2 +-
mkdocs.yaml | 2 +-
3 files changed, 19 insertions(+), 22 deletions(-)
diff --git a/docs/explanation/implementation.md b/docs/explanation/implementation.md
index b5a4ba51..5966aed3 100644
--- a/docs/explanation/implementation.md
+++ b/docs/explanation/implementation.md
@@ -1,28 +1,25 @@
-# Implementation design
+# Purpose and aim of pynxtools-xps
+pynxtools-xps aims for the implementation of [FAIR principles of data stewardship](https://doi.org/10.1162/dint_r_00024) in photoelectron spectroscopy (PES). In many experimental fields, there has been a push towards such standardization and interoperability in recent yeards; however, there has been a distinct lack of such efforts in PES.
-
+- We do not consider that our work is complete (from the perspective of the idea in mind that a user can expect to drag-and-drop arbitrary content).
+- We consider ontology matching a team effort that can only be achieved with technology partners and scientists working together.
+- Our work is open to suggestions by the PES community, always realizing that just being able to read from a specific file alone is not solving the challenge that pynxtools-xps addresses.
+- We provide specific tangible examples of (meta)data semantic mapping for specific file formats that are frequently used in XPS. These include the main formats of the leading vendors of PES spectrometers.
+- The tool itself is build such that is easily extendable.
+- The goal is to continously grow the number of parsers available for different communities. We therefore encourage researchers and technology partners to get in contact in order to get started with standardization in NeXus and NOMAD.
diff --git a/docs/index.md b/docs/index.md
index 0dbf323f..fcb33941 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -41,7 +41,7 @@ How-to guides provide step-by-step instructions for a wide range of tasks, with
The explanation section provides background knowledge on the implementation design, how the data is structured, how data processing can be incorporated, how the integration works in NOMAD, and more.
-- [Implementation design](explanation/implementation.md)
+- [Design principles and implementation](explanation/implementation.md)
- [NXmpes and NXxps](explanation/appdefs.md)
- [How to map pieces of information to NeXus](explanation/contextualization.md)
diff --git a/mkdocs.yaml b/mkdocs.yaml
index 46a31704..46409d9f 100644
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -19,7 +19,7 @@ nav:
- explanation/nomad_integration.md
- Reference:
- reference/specs.md
- - reference/scienta.md)
+ - reference/scienta.md
- reference/phi.md
- reference/vms.md
theme:
From d784fa311d1c6e4123d1c5c5d8273c6b3905d4e2 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Tue, 30 Apr 2024 16:50:44 +0200
Subject: [PATCH 6/9] use main branch for mkdocs building
---
.github/workflows/build_docs.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
index c798467c..116e8e5e 100644
--- a/.github/workflows/build_docs.yml
+++ b/.github/workflows/build_docs.yml
@@ -1,7 +1,7 @@
name: build_docs
on:
push:
- branches: ["*"]
+ branches: [main]
permissions:
contents: write
jobs:
From ae2501415e631fa11990d6830194b3f53e2992ad Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Tue, 30 Apr 2024 17:06:28 +0200
Subject: [PATCH 7/9] explain how mapping works
---
docs/explanation/contextualization.md | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/docs/explanation/contextualization.md b/docs/explanation/contextualization.md
index ef556a72..39d8410a 100644
--- a/docs/explanation/contextualization.md
+++ b/docs/explanation/contextualization.md
@@ -1,8 +1,9 @@
# How to map pieces of information to NeXus
-
\ No newline at end of file
+Upon parsing, the XPS reader uses the config file to map the (meta-)data to a *template* which follows the NeXus application definitions. It also takes metadata provided through additional means (i.e., an electronic lab notebook (ELN) file) to fill in missing required and recommended fields and attributes in the application definition that were not provided in the raw data fikes. It is this *template* variable from which core functions like *convert.py* of the pynxtools write the actual NeXus/HDF5 file. The latter tool is also referred to as the dataconverter of [pynxtools](https://github.com/FAIRmat-NFDI/pynxtools).
\ No newline at end of file
From e0e27ea1e26286f124ae198cadcd0d43e2d40c27 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Thu, 2 May 2024 11:14:01 +0200
Subject: [PATCH 8/9] add supported file formats to the main index in docs
---
docs/index.md | 14 +++++++++++++-
docs/reference/phi.md | 2 ++
docs/reference/scienta.md | 2 ++
docs/reference/specs.md | 2 ++
docs/reference/vms.md | 4 +++-
5 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/docs/index.md b/docs/index.md
index fcb33941..6f22d87c 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -13,7 +13,7 @@ pynxtools-xps solves the challenge of using heterogeneous and unfindable data fo
pynxtools-xps is useful for scientists from the XPS community that deal with heterogeneous data, for technology partners and data providers looking for ways to make their data FAIRer, and for research groups that want to organize thier data using NeXus and NOMAD.
-
+
### Tutorial
@@ -54,6 +54,18 @@ The explanation section provides background knowledge on the implementation desi
Here you can learn which specific measurement setups and file formats from technology partners pynxtools-xps currently supports.
+The reader decides which data parser to use based on the file extension of the files provided. For the main XPS files, the following file extensions are supported:
+- .spe, .pro: [Phi MultiPak](https://www.phi.com/surface-analysis-equipment/genesis.html#software:multi-pak-data-reduction-software/) files, propietary format of PHI Electronics
+- .sle: [SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/) files, propietary format of SPECS GmbH (1 and v4)
+- .xml: SpecsLab 2files, XML format from SPECS GmbH (v1.6)
+- .vms: VAMAS files, ISO standard data transfer format ([ISO 14976](https://www.iso.org/standard/24269.html)), both in regular and irregular format
+- .xy: SpecsLabProdigy export format in XY format (including all export settings)
+- .txt:
+ - exported by [Scienta Omicron](https://scientaomicron.com/en) instruments
+ - exported by [CasaXPS](https://www.casaxps.com/) analysis software
+
+You can find more information regarding the readers for data from different technology partners here:
+
- [Data exported by SPECS spectrometers](reference/specs.md)
- [Data exported by Scienta Omicron spectrometers](reference/scienta.md)
- [Data exported by Phi spectrometers](reference/phi.md)
diff --git a/docs/reference/phi.md b/docs/reference/phi.md
index d639ac1d..2500b7fb 100644
--- a/docs/reference/phi.md
+++ b/docs/reference/phi.md
@@ -4,6 +4,8 @@ The reader supports [Phi MultiPak](https://www.phi.com/surface-analysis-equipmen
+The reader for the Phi data can be found [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/pynxtools_xps/phi).
+
Example data for this file format is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/phi).
The example conversion can be run with the following commands:
diff --git a/docs/reference/scienta.md b/docs/reference/scienta.md
index 902fec90..5dec72f2 100644
--- a/docs/reference/scienta.md
+++ b/docs/reference/scienta.md
@@ -4,6 +4,8 @@ The reader supports reading data exported as .txt from Scienta Omicron [Scienta
+The reader for the Scienta data can be found [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/pynxtools_xps/scienta).
+
Example data is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/scienta).
The example conversion can be run with the following command.
diff --git a/docs/reference/specs.md b/docs/reference/specs.md
index ebf14cdf..4381a2af 100644
--- a/docs/reference/specs.md
+++ b/docs/reference/specs.md
@@ -8,6 +8,8 @@ The reader supports [SpecsLabProdigy](https://www.specs-group.com/nc/specs/produ
+The readers for the SPECS data can be found [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/pynxtools_xps/specs).
+
Example data for the SLE reader is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/specs).
The example conversion can be run with the following command.
diff --git a/docs/reference/vms.md b/docs/reference/vms.md
index 597994dd..50991878 100644
--- a/docs/reference/vms.md
+++ b/docs/reference/vms.md
@@ -4,7 +4,9 @@
The reader supports VAMAS (.vms) files, the ISO standard data transfer format ([ISO 14976](https://www.iso.org/standard/24269.html)) for X-ray photoelectron spectroscopy. The data can be stored both in REGULAR (i.e, with an equally spaced energy axis) as well as IRREGULAR mode. The data was measured with and exported from [SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/).
-Example data for the SLE reader is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/vms).
+The reader for the VAMAS format can be found [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/pynxtools_xps/scienta).
+
+Example data is available [here](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples/vms).
The example conversion for the REGULAR file can be run with the following command:
From 0d77330c47c795c0d42cf2aa8318f873d4258a09 Mon Sep 17 00:00:00 2001
From: Lukas Pielsticker <50139597+lukaspie@users.noreply.github.com>
Date: Thu, 2 May 2024 11:14:22 +0200
Subject: [PATCH 9/9] remove duplicate docs from main readme
---
README.md | 71 ++++---------------------------------------------------
1 file changed, 4 insertions(+), 67 deletions(-)
diff --git a/README.md b/README.md
index 12e2e4a1..4f0f2f52 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
# A reader for XPS data
-# Installation
+## Installation
It is recommended to use python 3.11 with a dedicated virtual environment for this package.
Learn how to manage [python versions](https://github.com/pyenv/pyenv) and
@@ -24,76 +24,13 @@ pip install pynxtools[xps]
for the latest development version.
-
-# Purpose
+## Purpose
This reader plugin for [`pynxtools`](https://github.com/FAIRmat-NFDI/pynxtools) is used to translate diverse file formats from the scientific community and technology partners
within the field of X-ray photoelectron spectroscopy into a standardized representation using the
[NeXus](https://www.nexusformat.org/) application definition [NXmpes](https://fairmat-nfdi.github.io/nexus_definitions/classes/contributed_definitions/NXmpes.html#nxmpes).
-## Supported file formats
-The reader decides which parser to use based on the file extension of the files provided. For the main XPS files, the following file extensions are supported:
-- .spe, .pro: [Phi MultiPak](https://www.phi.com/surface-analysis-equipment/genesis.html#software:multi-pak-data-reduction-software/) files, propietary format of PHI Electronics
-- .sle: [SpecsLabProdigy](https://www.specs-group.com/nc/specs/products/detail/prodigy/) files, propietary format of SPECS GmbH (1 and v4)
-- .xml: SpecsLab 2files, XML format from SPECS GmbH (v1.6)
-- .vms: VAMAS files, ISO standard data transfer format ([ISO 14976](https://www.iso.org/standard/24269.html)), both in regular and irregular format
-- .xy: SpecsLabProdigy export format in XY format (including all export settings)
-- .txt:
- - exported by [Scienta Omicron](https://scientaomicron.com/en) instruments
- - exported by [CasaXPS](https://www.casaxps.com/) analysis software
-
-We are continously working on adding parsers for other data formats and technology partners. If you would like to implement a parser for your data, feel free to get in contact.
-
-# Getting started
-An example script to run the XPS reader in `pynxtools`:
-```sh
- ! dataconverter \
---reader xps \
---nxdl NXmpes \
---input-file $ \
---input-file $ \
---output .nxs
-```
-Note that none of the supported file format have data/values for all required and recommended fields and attributes in NXmpes. In order for the validation step of the XPS reader to pass, you need to provide an ELN file that contains the missing values. Example raw and converted data can be found in [*pynxtools_xps/examples*](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/examples).
-
-
-# Contributing
-
-## Development install
-
-Install the package with its dependencies:
-
-```shell
-git clone https://github.com/FAIRmat-NFDI/pynxtools-xps.git \\
- --branch main \\
- --recursive pynxtools_xps
-cd pynxtools_xps
-python -m pip install --upgrade pip
-python -m pip install -e .
-python -m pip install -e ".[dev,consistency_with_pynxtools]"
-```
-
-There is also a [pre-commit hook](https://pre-commit.com/#intro) available
-which formats the code and checks the linting before actually commiting.
-It can be installed with
-```shell
-pre-commit install
-```
-from the root of this repository.
-
-## Development Notes
-The development process is modular so that new parsers can be added. The design logic is the following:
-1. First, [`XpsDataFileParser`](https://github.com/FAIRmat-NFDI/pynxtools-xps/blob/main/pynxtools_xps/file_parser.py#L36) selects the proper parser based on the file extensions of the provided files. It then calls a sub-parser that can read files with such extensions and calls the `parse_file` function of that reader. In addition, it selects a proper config file from
-the `config` subfolder.
-2. Afterwards, the NXmpes nxdl template is filled with the data in `XpsDataFileParser` using the [`config`](https://github.com/FAIRmat-NFDI/pynxtools-xps/tree/main/pynxtools_xps/config) file. Data that is not in the given main files can be added through the ELN file (and must be added for required fields in NXmpes).
-
-## Test this software
-
-Especially relevant for developers, there exists a basic test framework written in
-[pytest](https://docs.pytest.org/en/stable/) which can be used as follows:
-
-```shell
-python -m pytest -sv tests
-```
+## Docs
+Extensive documentation of this pynxtools plugin is available [here](fairmat-nfdi.github.io/pynxtools-xps/). You can find information about getting started, how-to guides, the supported file formats, how to get involved, and much more there.
## Contact person in FAIRmat for this reader
Lukas Pielsticker
\ No newline at end of file