From 8c2a8395c60ab4d1c1719d4e5e937e265a6fef0c Mon Sep 17 00:00:00 2001 From: Andrea Albino <95371554+aalbino2@users.noreply.github.com> Date: Wed, 7 Sep 2022 17:53:18 +0200 Subject: [PATCH] the line number of the yaml file is displayed in the error message (#34) * lineloader class * Line Tags in Error Messages! * pylint * support extends keyword in root level of yaml file * cleaning yaml2nxdl folder * pylint * tests implemented and PR comments solved * test_yaml2nxdl updated * test yaml2nxdl failing * python version changed in pylint.yml --- .github/workflows/pylint.yml | 4 +- .../yaml2nxdl/NXellipsometry-docCheck.yaml | 588 -------------- .../tools/yaml2nxdl/NXtest_links.nxdl.xml | 11 - nexusparser/tools/yaml2nxdl/NXtest_links.yml | 8 - .../Ref_NXellipsometry-docCheck.nxdl.xml | 755 ------------------ nexusparser/tools/yaml2nxdl/yaml2nxdl.py | 25 +- .../yaml2nxdl/yaml2nxdl_forward_tools.py | 263 +++--- .../yaml2nxdl_test_data/NXattributes.yml | 4 +- .../yaml2nxdl_test_data/NXfilelineError1.yml | 31 + .../yaml2nxdl_test_data/NXfilelineError2.yml | 31 + .../yaml2nxdl_test_data/NXfilelineError3.yml | 31 + .../Ref_NXattributes.nxdl.xml | 12 +- tests/tools/yaml2nxdl/test_yml2nxdl.py | 47 +- 13 files changed, 338 insertions(+), 1472 deletions(-) delete mode 100644 nexusparser/tools/yaml2nxdl/NXellipsometry-docCheck.yaml delete mode 100644 nexusparser/tools/yaml2nxdl/NXtest_links.nxdl.xml delete mode 100644 nexusparser/tools/yaml2nxdl/NXtest_links.yml delete mode 100644 nexusparser/tools/yaml2nxdl/Ref_NXellipsometry-docCheck.nxdl.xml create mode 100644 tests/data/tools/yaml2nxdl_test_data/NXfilelineError1.yml create mode 100644 tests/data/tools/yaml2nxdl_test_data/NXfilelineError2.yml create mode 100644 tests/data/tools/yaml2nxdl_test_data/NXfilelineError3.yml diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index e6df4953c..ca4e77aba 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -10,10 +10,10 @@ jobs: - uses: actions/checkout@v2 with: lfs: true - - name: Set up Python 3.7 + - name: Set up Python 3.8 uses: actions/setup-python@v2 with: - python-version: 3.7 + python-version: 3.8 - name: Install dependencies run: | git submodule sync --recursive diff --git a/nexusparser/tools/yaml2nxdl/NXellipsometry-docCheck.yaml b/nexusparser/tools/yaml2nxdl/NXellipsometry-docCheck.yaml deleted file mode 100644 index a6ee66566..000000000 --- a/nexusparser/tools/yaml2nxdl/NXellipsometry-docCheck.yaml +++ /dev/null @@ -1,588 +0,0 @@ -#01/2022 -#a draft version of a NeXus application definition for ellipsometry - -# the document has the following main elements: -# instrument used and is characteristics -# measured data, the discription of the sample and what was measured about it -# derived parameters: extra parameters derived in the measurement software - -# one would need a Class/ separate entry for the sample description which can link to and should contain a full description of the composition. -# A free description as text might be not enough or unclear. Especially if one deals with many layer structured samples and optically anisotropic -# samples, like triclinic one -# -# Comments are moved to the Development_notes.md file - - -category: application -doc: | - Ellipsometry, complex systems, up to variable angle spectroscopy. - - Information on ellipsometry is provided, e.g. in: - - H. Fujiwara, Spectroscopic ellipsometry: principles and applications, John Wiley & Sons, 2007. - - R. M. A. Azzam and N. M. Bashara, Ellipsometry and Polarized Light, North-Holland Publishing Company, 1977. - - H. G. Tompkins and E. A. Irene, Handbook of Ellipsometry, William Andrew, 2005. - - Open acces sources: - - https://www.angstromadvanced.com/resource.asp - - https://pypolar.readthedocs.io/en/latest/ - -symbols: - # normal quoated text is flattened out in YAML - doc: "Variables used throughout the document, e.g. dimensions and important parameters" - N_wavelength: Size of the energy / wavelength vector used - N_variables: How many variables are saved in a measurement (e.g. Psi and Delta, Mueller matrix) - N_angles: Number of incident angles used - N_p1: Number of sample parameters scanned - N_time: Number of time points measured - # we can go on, but this is already an at least 4 dimensional data set to work with - -(NXellipsometry): - (NXentry): - # exists: required -- is the default - # The first line of this doc is a summary; second line needs the indentation - # to be taken as an empty line, part of the doc! - doc: | - Ellipsometry, complex systems, up to variable angle spectroscopy. - - Information on ellipsometry is provided, e.g. in: - - H. Fujiwara, Spectroscopic ellipsometry: principles and applications, John Wiley & Sons, 2007. - - R. M. A. Azzam and N. M. Bashara, Ellipsometry and Polarized Light, North-Holland Publishing Company, 1977. - - H. G. Tompkins and E. A. Irene, Handbook of Ellipsometry, William Andrew, 2005. - - Open acces sources: - - https://www.angstromadvanced.com/resource.asp - - https://pypolar.readthedocs.io/en/latest/ - - definition: - doc: "An application definition for ellipsometry." - \@version: - doc: "Version number to identify which definition of this application definition - was used for this entry/data." - # New approach of NIAC: version controlled application definitions. - # taken this from ARPES - \@url: - doc: "URL where to find further material (documentation, examples) relevant to - the application definition" - enumeration: [NXellipsometry] - - # definition: - # doc: "FAIRmat-specific candidate proposal for an application definition - # exemplifying ellipsometry. For example: - # https://gitlab.mpcdf.mpg.de/nomad-lab/areab-appdef/-/blob/spectroscopic-ellipsometry-consolidation/optical_spectroscopy" - # #exists: optional - # \@version: - # doc: "Ideally version with build number are commit hash of the application - # definition. If not available a free-text description." - # \@url: - # doc: "URL where to find further material (documentation, examples) relevant - # to the application definition" - - experiment_identifier: - doc: "Unique identifier of the experiment, such as a (globally persistent) unique identifier. i) The identifier is usually defined by the facility or principle investigator. ii) The identifier enables to link experiments to e.g. proposals." - - experiment_description: - exists: recommended - doc: "A free-text description of the experiment. What is the aim of the experiment? - The general procedure." - - start_time(NX_DATE_TIME): - doc: "Start time of the experiment. UTC offset should be specified." - - program: - exists: optional - doc: "Commercial or otherwise defined given name to the program that was used - to generate the results file(s) with measured data and metadata (or a link - to the instrument software)." - \@version: - doc: "Either version with build number, commit hash, or description of a - (online) repository where the source code of the program and build - instructions can be found so that the program can be configured in such - a way that result files can be created ideally in a deterministic manner." - \@url: - doc: "Website of the software." - - operator(NXuser): - # we want to have several possible values, but one is required - exists: [min, 1] - doc: "Contact information of at least the user of the instrument or the - investigator who performed this experiment. - Adding multiple users if relevant is recommended." - name: - doc: "Name of the user." - affiliation: - doc: "Name of the affiliation of the user at the point in time when the - experiment was performed." - address: #CE: added doc strings - doc: "Full address (street, street number, ZIP, city, country) of the user's affiliation." - email: - doc: "Email address of the user." - orcid: - exists: recommended - doc: "Author ID defined by https://orcid.org/." - telephone_number: - exists: recommended - doc: "Official telephone number of the user." - - - (NXinstrument): - doc: "General properties of the ellipsometry equipment" - model: - doc: The name of the instrument - \@version: - doc: "The used version of the hardware if available. - If not a commercial instrument use date of completion of the hardware." - company: - exists: optional - doc: "Name of the company which build the instrument" - construction_year(NX_DATE_TIME): - exists: optional - doc: "ISO8601 date when the instrument was constructed. UTC offset should be specifiec." - software: - doc: "Name (e.g. commercial) of the software that was used for the measurement" - \@version: - doc: "Version and build number or commit hash of the software source code" - \@url: - doc: "Website of the software." - - light_source: - doc: "Specify the used light source. Multiple selection possible." - enumeration: [UV light, quartz tungsten halogen lamp, xenon arc lamp, deuterium lamp, silicon carbide globar, other] - other_light_source: - exists: optional - doc: "If you specified 'other' as light source type, please write down what it is." - focussing_probes(NX_BOOLEAN): - doc: "Were focussing probes (lenses) used or not?" - data_correction(NX_BOOLEAN): - exists: optional - doc: "Were the recorded data corrected by the window effects of the lenses or not?" - angular_spread(NX_NUMBER): - exists: optional - doc: "Specify the angular spread caused by the focussing probes" - unit: NX_ANGLE - ellipsometry_type: - doc: "What type of ellipsometry was used? See Fujiwara Table 4.2" - enumeration: [rotating analyzer, rotating analyzer with analyzer compensator, - rotating analyzer with polarizer compensator, - rotating polarizer, rotating compensator on polarizer side, - rotating compensator on analyzer side, modulator on polarizer side, - modulator on analyzer side, dual compensator, phase modulation, - imaging ellipsometry, null ellipsometry] - - calibration_status(NX_DATE_TIME): - doc: "Was a calibration done. If yes, when was it done?" - enumeration: [calibration time provided, no calibration, within 1 hour, within 1 day, within 1 week] - - calibration(NXsubentry): - exists: recommended - doc: "Ellipsometers require regular calibration to adjust the hardware - parameters for proper zero values and background light compensation." - calibration_time(NX_DATE_TIME): - exists: optional - doc: "If calibtration status is 'calibration time provided', specify the ISO8601 datum when calibration was - last performed before this measurement. UTC offset should be specified." - - calibration_data(NXsubentry): - doc: | - Arrays which provide the measured calibration data. - Multiple sets are possible, e.g. Psi and delta measured on an - e.g. silicon calibration waver, and the straight-through data. - - We - recommend to - - provide data that is measured under the same settings - - as the measurement was performed, that is if Psi and delta are measured - for your data, also provide Psi and delta here. - And use the same wavelenghts as there." - - calibration_data_type: - doc: "What data was recorded for the calibration, - The number of variables (N_variables) have to be set to the number of - provided data columns accordingly, e.g. psi/delta -> N_variables= 2, - Jones vector: N_variables = 4, Mueller martix -> N_variables= 16, etc." - enumeration: [psi/delta, tan(psi)/cos(delta), Jones matrix, Mueller matrix, not provided] - calibration_angle_of_incidence(NX_NUMBER): - doc: "angle(s) of incidence used during the calibration measurement - (excluding straight through mode)" - unit: NX_ANGLE - dimensions: - rank: 1 - dim: [[1, N_calibration_angles]] - - calibration_wavelength(NX_NUMBER): - doc: "The wavelength or equivalent values (which are inter-convertible). - The importer should convert all to one unit, and make the others accessible. - Historically, energy is used in eV, but for visible spectroscopy wavelength - is more common, for IR wave numbers in 1/cm units. - - Possibly use the same type of data as for the measurement!" - dimensions: - rank: 1 - dim: [[1, N_calibration_wavelength]] - - calibration_data(NX_NUMBER): - doc: "Calibration is performed on a reference surface (usually - silicon wafer with well defined oxide layer) at a number of - angles, then in a straight through mode (transmission in air)." - unit: NX_UNITLESS - dimensions: - rank: 3 - dim: [[3, N_calibration_angles+1], [2, N_variables], [1, N_calibration_wavelength]] - - calibration_sample(NX_CHAR): - doc: "Free-text to describe which sample was used for calibration, - e.g. silicon wafer with 25 nm thermal oxide layer." - - angle_of_incidence(NX_NUMBER): - doc: "Incident angle of the beam vs. the normal of the bottom - reflective (substrate) surface in the sample" - unit: NX_ANGLE - dimensions: - rank: 1 - dim: [[1, N_angles]] - - stage(NXsubentry): - # this needs at least a contributed class where you collect the - # members we could generalize NXem_stage to NXstage - # It would be nice to make it a base class - doc: "Sample stage, holding the sample at a specific position in X,Y,Z - (Cartesian) coordinate system and at an orientation defined - by three Euler angles (alpha, beta, gamma). - The stage may be motorized or manual, special for liquids or gas environment." - enumeration: [manual stage, scanning stage, liquid stage, gas cell, cryostat] - description: - doc: "A free-text field to provide information about the stage." - exists: recommended - (NXtransformations): - exists: recommended - doc: "The stage coordinate system vs. the incident beam. - The Z-axis of the stage is considered to point along the - normal of the substrate (bottom reflecting surface) from the stage - towards the general direction of the light source. The beam - comes with angle of incidence towards this Z-axis, but in opposite - direction, thus they are connected with a rotation of - 180 - angle of incidence (in degrees). - - This transformation brings us from the NEXUS coordinates to the stage coordinates. - - Then provide the set of translations (if there are any). These all - have a vector defining their relative direction in the current - coordinate system. (This current coordinate system changes with - every transformation if you set the parameter 'depends' to the - name of the previous step.) - - Last, provide the rotations of the sample" - - alternative: - exists: optional - doc: "If there is no motorized stage, we should at least qualify - where the beam hits the sample and in what direction the sample stands - in a free-text description, e.g. 'center of sample, long edge parallel to - plane of incidence'." - - - window(NXaperture): - exists: optional - doc: "For environmental measurements, the environment (liquid, vapor, - vacuum etc.) is enclosed in a cell or cryostat, which has windows - both in the direction of the source and the detector (looking from - the sample). These windows also add a phase shift to the light - altering the measured signal. This shift has to be corrected based - on measuring a known sample in the environmental cell." - - material(NX_CHAR): - doc: The material of the window - # add enumeration: [quartzglass, diamond, etc.]? - - thickness(NX_NUMBER): - doc: Thickness of the window - unit: NX_LENGTH - - orientation_angle(NX_NUMBER): - doc: "Angle of the window normal (outer) vs. the substrate normal - (similar to the angle of incidence)." - unit: NX_ANGLE - - reference_data(NXsubentry): - # NXdata is a view of data, here we have a set of information, use subentry - doc: "Recorded data that can be used to calculate the window effect. - Typically this is the substrate (e.g. silicon with thermal oxide layer) - in air without window and in a known medium with the window." - - reference_sample: - doc: "What sample was used to estimate the window effect." - - reference_wavelength(NX_NUMBER): - doc: "Use the same wavelengths at which all other measurements are recorded" - unit: NX_LENGTH - dimensions: - rank: 1 - dim: [[1, N_wavelength]] - - data(NX_NUMBER): #Should we make this recommended instead of required, if window calibration data - # are not available but the user would still like to specify the window properties? - doc: "Recorded data of a reference surface with and without window / medium." - unit: NX_UNITLESS - #can one specify the dimensions of these calibration data? - dimensions: - rank: 4 - dim: [[4,2], [3, N_angles], [2, N_variables], [1, N_wavelength]] - - (NXdetector): - doc: "Which type of detector was used, and what is known about it? - A detector can be a photomultiplier (PMT), a CCD in a camera, - an array in a spectrometer. If so, the whole detector unit goes in here." - - detector_type: - doc: "What kind of detector module is used, e.g. CCD-spectrometer, - CCD camera, PMT, photodiode, etc." - enumeration: [PMT, photodiode, avalanche diode, CCD camera, CCD spectrometer, other] - - other_detector: - exists: optional - doc: "If you specified 'other' as detector type, please write down what it is." - - integration_time(NX_NUMBER): - doc: "Integration time for the measurement. Single number or array if it was varied." - unit: NX_TIME - - revolution(NX_NUMBER): - exists: optional - doc: "Define how many rotations of the rotating element were taken into - account per spectrum." - unit: NX_ANY - - rotating_element: - doc: "Define which elements rotates, e.g. polarizer or analyzer." - enumeration: [polarizer (source side), analyzer (detector side), compensator (source side), compensator (detector side)] - - fixed_revolution(NX_NUMBER): - exists: optional - doc: "rotation rate, if the revolution does not change during the measurement." - unit: NX_FREQUENCY - - variable_revolution(NX_NUMBER): - exists: optional - doc: "Specify maximum and minimum values for the revolution." - dimensions: - rank: 1 - dim: [[1, 2]] - - (NXsample): - doc: "Properties of the sample, its history, the sample environment and - experimental conditions (e.g. surrounding medium, temperature, pressure etc.), - along with the data (data type, wavelength array, measured data)." - atom_types: - doc: "Use Hill's system for listing elements of the periodic table - which are inside or attached to the surface of the specimen - and thus relevant from a scientific point. The purpose of this field - is to allow materials database to parse the relevant elements without - having to interpret the sample history or other fields." - sample_name: - doc: "Descriptive name of the sample" - - sample_history: - doc: "Ideally, a reference to the location or a unique (globally persistent) - identifier (e.g.) of e.g. another file which gives as many as possible - details of the material, its microstructure, and its thermo-chemo-mechanical - processing/preparation history. In the case that such a detailed - history of the sample is not available, use this field as a free-text - description to specify details of the sample and its preparation." - - preparation_date(NX_DATE_TIME): - exists: recommended - doc: "ISO 8601 date with time zone specified. UTC offset should be specifiec." - - layer_structure: - doc: "Qualitative description of the layer structure for the sample. - For example: Si/native oxide/thermal oxide/polymer/peptide" - - # orientation_matrix(n_comp, 3, 3): is part of the base class - # documentation is of the original NEXUS class - # DO we really need this mandatory, or just leave it? - - - data_identifier(NX_NUMBER): - doc: "An identifier to correlate data to the experimental conditions, - if several were used in this measurement; - typically an index of 0 - N" - #what about uniqueness of the identifier? - # -- append it to the experiment identifier... - - data_type: - doc: "Select which type of data was recorded, for example Psi and Delta - (see: https://en.wikipedia.org/wiki/Ellipsometry#Data_acquisition). - It is possible to have multiple selections. Data types may also be converted - to each other, e.g. a Mueller matrix contains N,C,S data as well. - This selection defines how many columns (N_variables) are stored in the - data array." - enumeration: [psi / delta, tan(psi)/cos(delta), Mueller matrix, Jones matrix, N/C/S, raw data] - - # the above definition and documentation defines N_variables, we do not need this here anymore - # number_of_variables(NX_UINT): - # doc: "specify the number of variables stored, e.g. psi, delta and their errors are 4 (this can be also automated, based on the provided data table)" - # #we should use maybe in the future again the concepts of symbols - - wavelength(NX_NUMBER): - doc: "Wavelength value(s) used for the measurement.\n - An array of 1 or more elements. Length defines N_wavelength" - unit: NX_LENGTH - dimensions: - rank: 1 - dim: [[1, N_wavelength]] - - measured_data(NX_NUMBER): - doc: "Resulting data from the measurement, described by data type.\n - Minimum two columns containing Psi and delta, or for the normalized Mueller matrix, - it may be 16 (or 15 if 1,1 is all 1)." - dimensions: - rank: 5 - dim: [[5, N_time], [4, N_p1], [3, N_angles], [2, N_variables], [1, N_wavelength]] - - data_error(NX_NUMBER): - doc: "Specified uncertainties (errors) of the data described by data type. - The structure is the same as for the measured data." - exists: recommended - dimensions: - rank: 5 - dim: [[5, N_time], [4, N_p1], [3, N_angles], [2, N_variables], [1, N_wavelength]] - - time_points(NX_NUMBER): # can this also be shifted to varied parameters? - exists: optional - doc: "An array of relative time points if a time series was recorded" - unit: NX_TIME - - medium: - # we need this or the next one - doc: "Describe what was the medium above or around the sample. - The common model is built up from substrate to the medium on the - other side. Both boundaries are assumed infinite in the model. - Here define the name of the material (e.g. water, air, etc.)." - - medium_refractive_indices(NX_NUMBER): #CE: changed NX_COMPLEX to NX_NUMBER.... - exists: optional - doc: "Array of pairs of complex refractive indices of the medium for - every measured wavelength. - Only necessary if the measurement was performed not in air, or - something very well known, e.g. high purity water. - Specify the complex refractive index: n + ik" - unit: NX_UNITLESS - dimensions: - rank: 1 - dim: [[1, N_wavelength]] - - environment_conditions: - exists: optional - doc: "External parameters that have influenced the sample." - - number_of_runs(NX_UINT): - exists: optional - doc: "How many measurements were done varying the parameters? - This forms an extra dimension beyond incident angle, time points and - energy / wavelength (this is the length of the 4th dimension of the data). - Defaults to 1." - unit: NX_DIMENSIONLESS - - varied_parameters: - exists: optional - doc: "Indicates which parameter was changed. Its definition must exist below. - The specified variable has to be number_of_runs long, - providing the parameters for each data set." - enumeration: [optical excitation, voltage, temperature, pH, stress, stage positions] - - optical_excitation(NXsubentry): - exists: optional - #is a boolean sufficient as a storage container? - doc: "Was the sample modified using an optical source? Describe in this group - the parameters of the optical excitation used." - excitation_source: - doc: "Specify the source for the external excitation" - - excitation_wavelength(NX_NUMBER): - doc: "Wavelength value(s) or the range used for excitation.\n - In cases of continuous laser radiation a value or a set of values - may do but for other illumination types, such as pulsed lasers, - or lamps, a range may describe the source better." - unit: NX_LENGTH - broadening(NX_NUMBER): - exists: optional - doc: "Specify the FWHM of the excitation" - unit: NX_LENGTH - excitation_type: - doc: "CW or pulsed excitation" - enumeration: [cw, pulsed] - pulse_length(NX_NUMBER): - exists: optional - doc: "Duration of one laser pulse." - unit: NX_TIME - repetition_rate(NX_NUMBER): - exists: optional - doc: "Repetition rate of the laser." - unit: NX_FREQUENCY - excitation_duration(NX_TIME): - exists: optional - doc: "How long was the sample excited." - pulse_energy(NX_NUMBER): - exists: optional - doc: "The integrated energy of light pulse." - unit: NX_ENERGY - excitation_power(NX_NUMBER): - exists: optional - doc: "The power of one laser pulse." - unit: NX_ENERGY - - voltage(NX_NUMBER): - exists: optional - doc: "Specify the voltage if the spectra were taken under bias" - # ... continue to explain what the user should do then - unit: NX_VOLTAGE - temperature(NX_NUMBER): - exists: optional - doc: "Temperature of the sample (sample holder, medium)" - unit: NX_TEMPERATURE - pH(NX_NUMBER): - exists: optional - doc: "pH of medium (measured or set)" - unit: NX_UNITLESS - pressure(NX_NUMBER): - exists: optional - doc: "Pressure of the environment of the sample." - unit: NX_PRESSURE - - # TO DO: stress and strain - # - # stress(NX_NUMBER): - # exists: optional - # # -- for this, we can take the original stress_field from NX_sample... - # doc: "Mechanical stress exerted on the sample." - # unit: NX_PRESSURE - # - # stress_orientation(NX_NUMBER): - # exists: optional - # doc: "Euler angles of stress relative to sample in the stage coordinate system, - # see instrument/stage translation part, axes X,Y,Z." - # unit: NX_ANGLE - # dimensions: - # rank: 1 - # dim: [[1, 3]] - # UNCLEAR, which Euler angles, i.e. according to which convention how mapping to reference coordinate system, - # why not use stress or strain tensor applied on sample? - - # derived parameters - derived_parameters(NXprocess): - exists: optional - doc: "What parameters are derived from the above data." - depolarization(NX_NUMBER): - exists: optional - doc: "Light loss due to depolarization as a value in [0-1]." - unit: NX_UNITLESS - - plot(NXdata): - exists: optional - doc: "A default view of the data, in this case Psi vs. wavelength and the angles of incidence. - If Psi does not exist, use other Müller matrix elements, such as N, C and S." - \@axes: - doc: "We recommend to use wavelength as a default attribute, but it can be - replaced in the case of not full spectral ellipsometry to any suitable - parameter along the X-axis." diff --git a/nexusparser/tools/yaml2nxdl/NXtest_links.nxdl.xml b/nexusparser/tools/yaml2nxdl/NXtest_links.nxdl.xml deleted file mode 100644 index 4e7b1fe49..000000000 --- a/nexusparser/tools/yaml2nxdl/NXtest_links.nxdl.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - Test case for verifying that the parser can handle links correctly. - - - - - - diff --git a/nexusparser/tools/yaml2nxdl/NXtest_links.yml b/nexusparser/tools/yaml2nxdl/NXtest_links.yml deleted file mode 100644 index 4bd6d5c36..000000000 --- a/nexusparser/tools/yaml2nxdl/NXtest_links.yml +++ /dev/null @@ -1,8 +0,0 @@ -category: base -doc: Test case for verifying that the parser can handle links correctly. -(NXentry): - (NXdata): - polar_angle(link): - target: here1 - target_angle(link): - target: here2 diff --git a/nexusparser/tools/yaml2nxdl/Ref_NXellipsometry-docCheck.nxdl.xml b/nexusparser/tools/yaml2nxdl/Ref_NXellipsometry-docCheck.nxdl.xml deleted file mode 100644 index bda484d99..000000000 --- a/nexusparser/tools/yaml2nxdl/Ref_NXellipsometry-docCheck.nxdl.xml +++ /dev/null @@ -1,755 +0,0 @@ - - - - - Ellipsometry, complex systems, up to variable angle spectroscopy. - Information on ellipsometry is provided, e.g. in: - H. Fujiwara, Spectroscopic - ellipsometry: principles and applications, John Wiley & Sons, 2007. - R. M. A. Azzam and - N. M. Bashara, Ellipsometry and Polarized Light, North-Holland Publishing Company, 1977. - - H. G. Tompkins and E. A. Irene, Handbook of Ellipsometry, William Andrew, 2005. - Open acces sources: - https://www.angstromadvanced.com/resource.asp - - https://pypolar.readthedocs.io/en/latest/ - - - - Variables used throughout the document, e.g. dimensions and important - parameters - - - - Size of the energy / wavelength vector used - - - - - How many variables are saved in a measurement (e.g. Psi and Delta, - Mueller matrix) - - - - - Number of incident angles used - - - - - Number of sample parameters scanned - - - - - Number of time points measured - - - - - - Ellipsometry, complex systems, up to variable angle spectroscopy. - - Information on ellipsometry is provided, e.g. in: - - H. Fujiwara, Spectroscopic ellipsometry: principles and applications, John Wiley & Sons, - 2007. - - R. M. A. Azzam and N. M. Bashara, Ellipsometry and Polarized Light, North-Holland - Publishing Company, 1977. - - H. G. Tompkins and E. A. Irene, Handbook of Ellipsometry, William Andrew, 2005. - - Open acces sources: - - https://www.angstromadvanced.com/resource.asp - - https://pypolar.readthedocs.io/en/latest/ - - - - - An application definition for ellipsometry. - - - - Version number to identify which definition of this application definition was used for - this entry/data. - - - - - URL where to find further material (documentation, examples) relevant to the application - definition - - - - - - - - - Unique identifier of the experiment, such as a (globally persistent) unique identifier. i) - The identifier is usually defined by the facility or principle investigator. ii) The - identifier enables to link experiments to e.g. proposals. - - - - - A free-text description of the experiment. What is the aim of the experiment? The general - procedure. - - - - - Start time of the experiment. UTC offset should be specified. - - - - - Commercial or otherwise defined given name to the program that was used to generate the - results file(s) with measured data and metadata (or a link to the instrument software). - - - - Either version with build number, commit hash, or description of a (online) repository - where the source code of the program and build instructions can be found so that the - program can be configured in such a way that result files can be created ideally in a - deterministic manner. - - - - - Website of the software. - - - - - - Contact information of at least the user of the instrument or the investigator who - performed this experiment. Adding multiple users if relevant is recommended. - - - - Name of the user. - - - - - Name of the affiliation of the user at the point in time when the experiment was - performed. - - - - - Full address (street, street number, ZIP, city, country) of the user's affiliation. - - - - - Email address of the user. - - - - - Author ID defined by https://orcid.org/. - - - - - Official telephone number of the user. - - - - - - General properties of the ellipsometry equipment - - - - The name of the instrument - - - - The used version of the hardware if available. If not a commercial instrument use date of - completion of the hardware. - - - - - - Name of the company which build the instrument - - - - - ISO8601 date when the instrument was constructed. UTC offset should be specifiec. - - - - - Name (e.g. commercial) of the software that was used for the measurement - - - - Version and build number or commit hash of the software source code - - - - - Website of the software. - - - - - - Specify the used light source. Multiple selection possible. - - - - - - - - - - - - - If you specified 'other' as light source type, please write down what it is. - - - - - Were focussing probes (lenses) used or not? - - - - - Were the recorded data corrected by the window effects of the lenses or not? - - - - - Specify the angular spread caused by the focussing probes - - - - - What type of ellipsometry was used? See Fujiwara Table 4.2 - - - - - - - - - - - - - - - - - - - Was a calibration done. If yes, when was it done? - - - - - - - - - - - - Ellipsometers require regular calibration to adjust the hardware parameters for proper - zero values and background light compensation. - - - - If calibtration status is 'calibration time provided', specify the ISO8601 datum when - calibration was last performed before this measurement. UTC offset should be specified. - - - - - Arrays which provide the measured calibration data. - Multiple sets are possible, e.g. Psi and delta measured on an - e.g. silicon calibration waver, and the straight-through data. - - We - recommend to - - provide data that is measured under the same settings - - as the measurement was performed, that is if Psi and delta are measured - for your data, also provide Psi and delta here. - And use the same wavelenghts as there." - - - - - What data was recorded for the calibration, The number of variables (N_variables) have to - be set to the number of provided data columns accordingly, e.g. psi/delta -> N_variables= - 2, Jones vector: N_variables = 4, Mueller martix -> N_variables= 16, etc. - - - - - - - - - - - - angle(s) of incidence used during the calibration measurement (excluding straight through - mode) - - - - - - - - The wavelength or equivalent values (which are inter-convertible). The importer should - convert all to one unit, and make the others accessible. Historically, energy is used in - eV, but for visible spectroscopy wavelength is more common, for IR wave numbers in 1/cm - units. - Possibly use the same type of data as for the measurement! - - - - - - - - Calibration is performed on a reference surface (usually silicon wafer with well defined - oxide layer) at a number of angles, then in a straight through mode (transmission in air). - - - - - - - - - - - Free-text to describe which sample was used for calibration, e.g. silicon wafer with 25 nm - thermal oxide layer. - - - - - - Incident angle of the beam vs. the normal of the bottom reflective (substrate) surface in - the sample - - - - - - - - Sample stage, holding the sample at a specific position in X,Y,Z (Cartesian) coordinate - system and at an orientation defined by three Euler angles (alpha, beta, gamma). The stage - may be motorized or manual, special for liquids or gas environment. - - - - - - - - - - - A free-text field to provide information about the stage. - - - - - The stage coordinate system vs. the incident beam. The Z-axis of the stage is considered - to point along the normal of the substrate (bottom reflecting surface) from the stage - towards the general direction of the light source. The beam comes with angle of incidence - towards this Z-axis, but in opposite direction, thus they are connected with a rotation of - 180 - angle of incidence (in degrees). - This transformation brings us from the NEXUS coordinates to the stage coordinates. - Then provide the set of translations (if there are any). These all have a vector defining - their relative direction in the current coordinate system. (This current coordinate system - changes with every transformation if you set the parameter 'depends' to the name of the - previous step.) - Last, provide the rotations of the sample - - - - If there is no motorized stage, we should at least qualify where the beam hits the sample - and in what direction the sample stands in a free-text description, e.g. 'center of - sample, long edge parallel to plane of incidence'. - - - - - - - For environmental measurements, the environment (liquid, vapor, vacuum etc.) is enclosed - in a cell or cryostat, which has windows both in the direction of the source and the - detector (looking from the sample). These windows also add a phase shift to the light - altering the measured signal. This shift has to be corrected based on measuring a known - sample in the environmental cell. - - - - The material of the window - - - - - Thickness of the window - - - - - Angle of the window normal (outer) vs. the substrate normal (similar to the angle of - incidence). - - - - - Recorded data that can be used to calculate the window effect. Typically this is the - substrate (e.g. silicon with thermal oxide layer) in air without window and in a known - medium with the window. - - - - What sample was used to estimate the window effect. - - - - - Use the same wavelengths at which all other measurements are recorded - - - - - - - - Recorded data of a reference surface with and without window / medium. - - - - - - - - - - - - - Which type of detector was used, and what is known about it? A detector can be a - photomultiplier (PMT), a CCD in a camera, an array in a spectrometer. If so, the whole - detector unit goes in here. - - - - What kind of detector module is used, e.g. CCD-spectrometer, CCD camera, PMT, photodiode, - etc. - - - - - - - - - - - - - If you specified 'other' as detector type, please write down what it is. - - - - - Integration time for the measurement. Single number or array if it was varied. - - - - - Define how many rotations of the rotating element were taken into account per spectrum. - - - - - Define which elements rotates, e.g. polarizer or analyzer. - - - - - - - - - - - rotation rate, if the revolution does not change during the measurement. - - - - - Specify maximum and minimum values for the revolution. - - - - - - - - - - Properties of the sample, its history, the sample environment and experimental conditions - (e.g. surrounding medium, temperature, pressure etc.), along with the data (data type, - wavelength array, measured data). - - - - Use Hill's system for listing elements of the periodic table which are inside or attached - to the surface of the specimen and thus relevant from a scientific point. The purpose of - this field is to allow materials database to parse the relevant elements without having to - interpret the sample history or other fields. - - - - - Descriptive name of the sample - - - - - Ideally, a reference to the location or a unique (globally persistent) identifier (e.g.) - of e.g. another file which gives as many as possible details of the material, its - microstructure, and its thermo-chemo-mechanical processing/preparation history. In the - case that such a detailed history of the sample is not available, use this field as a - free-text description to specify details of the sample and its preparation. - - - - - ISO 8601 date with time zone specified. UTC offset should be specifiec. - - - - - Qualitative description of the layer structure for the sample. For example: Si/native - oxide/thermal oxide/polymer/peptide - - - - - An identifier to correlate data to the experimental conditions, if several were used in - this measurement; typically an index of 0 - N - - - - - Select which type of data was recorded, for example Psi and Delta (see: - https://en.wikipedia.org/wiki/Ellipsometry#Data_acquisition). It is possible to have - multiple selections. Data types may also be converted to each other, e.g. a Mueller matrix - contains N,C,S data as well. This selection defines how many columns (N_variables) are - stored in the data array. - - - - - - - - - - - - - Wavelength value(s) used for the measurement. - An array of 1 or more elements. Length defines N_wavelength - - - - - - - - Resulting data from the measurement, described by data type. - Minimum two columns containing Psi and delta, or for the normalized Mueller matrix, it - may be 16 (or 15 if 1,1 is all 1). - - - - - - - - - - - - Specified uncertainties (errors) of the data described by data type. The structure is the - same as for the measured data. - - - - - - - - - - - - An array of relative time points if a time series was recorded - - - - - Describe what was the medium above or around the sample. The common model is built up from - substrate to the medium on the other side. Both boundaries are assumed infinite in the - model. Here define the name of the material (e.g. water, air, etc.). - - - - - Array of pairs of complex refractive indices of the medium for every measured wavelength. - Only necessary if the measurement was performed not in air, or something very well known, - e.g. high purity water. Specify the complex refractive index: n + ik - - - - - - - - External parameters that have influenced the sample. - - - - - How many measurements were done varying the parameters? This forms an extra dimension - beyond incident angle, time points and energy / wavelength (this is the length of the 4th - dimension of the data). Defaults to 1. - - - - - Indicates which parameter was changed. Its definition must exist below. The specified - variable has to be number_of_runs long, providing the parameters for each data set. - - - - - - - - - - - - - Was the sample modified using an optical source? Describe in this group the parameters of - the optical excitation used. - - - - Specify the source for the external excitation - - - - - Wavelength value(s) or the range used for excitation. - In cases of continuous laser radiation a value or a set of values may do but for other - illumination types, such as pulsed lasers, or lamps, a range may describe the source - better. - - - - - Specify the FWHM of the excitation - - - - - CW or pulsed excitation - - - - - - - - - Duration of one laser pulse. - - - - - Repetition rate of the laser. - - - - - How long was the sample excited. - - - - - The integrated energy of light pulse. - - - - - The power of one laser pulse. - - - - - - Specify the voltage if the spectra were taken under bias - - - - - Temperature of the sample (sample holder, medium) - - - - - pH of medium (measured or set) - - - - - Pressure of the environment of the sample. - - - - - - What parameters are derived from the above data. - - - - Light loss due to depolarization as a value in [0-1]. - - - - - - A default view of the data, in this case Psi vs. wavelength and the angles of incidence. - If Psi does not exist, use other Müller matrix elements, such as N, C and S. - - - - We recommend to use wavelength as a default attribute, but it can be replaced in the case - of not full spectral ellipsometry to any suitable parameter along the X-axis. - - - - - diff --git a/nexusparser/tools/yaml2nxdl/yaml2nxdl.py b/nexusparser/tools/yaml2nxdl/yaml2nxdl.py index 7bae88c1a..62156bbef 100755 --- a/nexusparser/tools/yaml2nxdl/yaml2nxdl.py +++ b/nexusparser/tools/yaml2nxdl/yaml2nxdl.py @@ -98,17 +98,25 @@ def yaml2nxdl(input_file: str, verbose: bool): application and base are valid categories!' assert 'doc' in yml_appdef.keys(), 'Required root-level keyword doc is missing!' - xml_root.set('extends', 'NXobject') xml_root.set('type', 'group') + + if 'extends' in yml_appdef.keys(): + xml_root.set('extends', yml_appdef['extends']) + del yml_appdef['extends'] + else: + xml_root.set('extends', 'NXobject') + if yml_appdef['category'] == 'application': xml_root.set('category', 'application') - del yml_appdef['category'] else: xml_root.set('category', 'base') - del yml_appdef['category'] + del yml_appdef['category'] if 'symbols' in yml_appdef.keys(): - yaml2nxdl_forward_tools.xml_handle_symbols(xml_root, yml_appdef['symbols']) + yaml2nxdl_forward_tools.xml_handle_symbols(yml_appdef, + xml_root, + 'symbols', + yml_appdef['symbols']) del yml_appdef['symbols'] assert isinstance(yml_appdef['doc'], str) and yml_appdef['doc'] != '', 'Doc \ @@ -119,9 +127,14 @@ def yaml2nxdl(input_file: str, verbose: bool): del yml_appdef['doc'] - assert len(yml_appdef.keys()) == 1, 'Accepting at most keywords: category, \ + root_keys = 0 + for key in yml_appdef.keys(): + if '__line__' not in key: + root_keys += 1 + + assert root_keys == 1, 'Accepting at most keywords: category, \ doc, symbols, and NX... at root-level!' - keyword = list(yml_appdef.keys())[0] # which is the only one + keyword = list(yml_appdef.keys())[0] assert (keyword[0:3] == '(NX' and keyword[-1:] == ')' and len(keyword) > 4), 'NX \ keyword has an invalid pattern, or is too short!' xml_root.set('name', keyword[1:-1]) diff --git a/nexusparser/tools/yaml2nxdl/yaml2nxdl_forward_tools.py b/nexusparser/tools/yaml2nxdl/yaml2nxdl_forward_tools.py index 4d9166be7..c98567443 100644 --- a/nexusparser/tools/yaml2nxdl/yaml2nxdl_forward_tools.py +++ b/nexusparser/tools/yaml2nxdl/yaml2nxdl_forward_tools.py @@ -26,6 +26,12 @@ import textwrap import yaml +from yaml.composer import Composer +from yaml.constructor import Constructor + +from yaml.nodes import ScalarNode +from yaml.resolver import BaseResolver +from yaml.loader import Loader from nexusparser.tools import nexus @@ -35,16 +41,55 @@ NX_TYPE_KEYS = nexus.get_nx_attribute_type() NX_ATTR_IDNT = '\\@' NX_UNIT_IDNT = 'unit' -NX_UNIT_TYPS = nexus.get_nx_units() +NX_UNIT_TYPES = nexus.get_nx_units() + + +class LineLoader(Loader): # pylint: disable=too-many-ancestors + """ + LineLoader parses a yaml into a python dictionary extended with extra items. + The new items have as keys __line__ and as values the yaml file line number + """ + + def compose_node(self, parent, index): + # the line number where the previous token has ended (plus empty lines) + node = Composer.compose_node(self, parent, index) + node.__line__ = self.line + 1 + return node + + def construct_mapping(self, node, deep=False): + node_pair_lst = node.value + node_pair_lst_for_appending = [] + + for key_node in node_pair_lst: + shadow_key_node = ScalarNode( + tag=BaseResolver.DEFAULT_SCALAR_TAG, value='__line__' + key_node[0].value) + shadow_value_node = ScalarNode( + tag=BaseResolver.DEFAULT_SCALAR_TAG, value=key_node[0].__line__) + node_pair_lst_for_appending.append( + (shadow_key_node, shadow_value_node)) + + node.value = node_pair_lst + node_pair_lst_for_appending + return Constructor.construct_mapping(self, node, deep=deep) def yml_reader(inputfile): """ - Yaml module based reading of .yml file + This function launches the LineLoader class. + It parses the yaml in a dict and extends it with line tag keys for each key of the dict. + """ + + plain_text_yaml = open(inputfile, "r").read() + loader = LineLoader(plain_text_yaml) + return loader.get_single_data() + + +def yml_reader_nolinetag(inputfile): + """ + pyyaml based parsing of yaml file in python dict """ with open(inputfile, 'r') as stream: parsed_yaml = yaml.safe_load(stream) - return parsed_yaml + return parsed_yaml def nx_name_type_resolving(tmp): @@ -103,11 +148,13 @@ def xml_handle_units(obj, value): obj.set('units', value) -def xml_handle_exists(obj, value): +def xml_handle_exists(dct, obj, keyword, value): """This function creates an 'exists' element instance, and appends it to an existing element """ - assert value is not None, 'xml_handle_exists, value must not be None!' + + line_number = f'__line__{keyword}' + assert value is not None, f'Line {dct[line_number]}: exists argument must not be None !' if isinstance(value, list): if len(value) == 2 and value[0] == 'min': obj.set('minOccurs', str(value[1])) @@ -120,13 +167,15 @@ def xml_handle_exists(obj, value): else: obj.set('maxOccurs', 'unbounded') elif len(value) == 4 and (value[0] != 'min' or value[2] != 'max'): - raise ValueError('exists keyword needs to go either with an optional \ -[recommended] list with two entries either [min, ] or \ -[max, ], or a list of four entries [min, , max, ] !') + raise ValueError(f'Line {dct[line_number]}: exists keyword' + f'needs to go either with an optional [recommended] list with two' + f'entries either [min, ] or [max, ], or a list of four' + f'entries [min, , max, ] !') else: - raise ValueError('exists keyword needs to go either with optional, \ -recommended, a list with two entries either [min, ] or \ -[max, ], or a list of four entries [min, , max, ] !') + raise ValueError(f'Line {dct[line_number]}: exists keyword' + f'needs to go either with optional, recommended, a list with two' + f'entries either [min, ] or [max, ], or a list of four' + f'entries [min, , max, ] !') else: if value == 'optional': obj.set('optional', 'true') @@ -150,28 +199,30 @@ def xml_handle_group(verbose, obj, value, keyword_name, keyword_type): recursive_build(grp, value, verbose) -def xml_handle_dimensions(obj, value: dict): +def xml_handle_dimensions(dct, obj, keyword, value: dict): """This function creates a 'dimensions' element instance, and appends it to an existing element """ - assert 'dim' in value.keys(), 'xml_handle_dimensions \ -rank and/or dim not keys in value dict!' + line_number = f'__line__{keyword}' + assert 'dim' in value.keys(), f'Line {dct[line_number]}: dim is not a key in dimensions dict !' dims = ET.SubElement(obj, 'dimensions') if 'rank' in value.keys(): dims.set('rank', str(value['rank'])) for element in value['dim']: - assert isinstance(element, list), 'xml_handle_dimensions, element is not a list!' - assert len(element) >= 2, 'xml_handle_dimensions, list element has less than two entries!' + line_number = f'__line__dim' + assert isinstance(element, list), f'Line {value[line_number]}: dim argument not a list !' + assert len( + element) >= 2, f'Line {value[line_number]}: dim list has less than two entries !' dim = ET.SubElement(dims, 'dim') dim.set('index', str(element[0])) dim.set('value', str(element[1])) if len(element) == 3: - assert element[2] == 'optional', 'xml_handle_dimensions element is \ -a list with unexpected number of entries!' + assert element[2] == 'optional', f'Line {value[line_number]}: dim argument \ +is a list with unexpected number of entries!' dim.set('required', 'false') -def xml_handle_enumeration(obj, value, verbose): +def xml_handle_enumeration(dct, obj, keyword, value, verbose): """This function creates an 'enumeration' element instance. Two cases are handled: @@ -180,60 +231,80 @@ def xml_handle_enumeration(obj, value, verbose): """ enum = ET.SubElement(obj, 'enumeration') - assert len(value) >= 1, 'xml_handle_enumeration, value must not be an empty list!' + line_number = f'__line__{keyword}' + assert value is not None, f'Line {dct[line_number]}: enumeration must \ +bear at least an argument !' + assert len( + value) >= 1, f'Line {dct[line_number]}: enumeration must not be an empty list!' if isinstance(value, list): for element in value: itm = ET.SubElement(enum, 'item') itm.set('value', str(element)) if isinstance(value, dict) and value != {}: for element in value.keys(): - itm = ET.SubElement(enum, 'item') - itm.set('value', str(element)) - recursive_build(itm, value[str(element)], verbose) + if '__line__' not in element: + itm = ET.SubElement(enum, 'item') + itm.set('value', str(element)) + recursive_build(itm, value[str(element)], verbose) -def xml_handle_link(obj, keyword, value): +def xml_handle_link(dct, obj, keyword, value): """If we have an NXDL link we decode the name attribute from (link)[:-6] """ - if len(keyword[:-6]) >= 1 and isinstance(value, dict) and 'target' in value.keys(): - if isinstance(value['target'], str) and len(value['target']) >= 1: - lnk = ET.SubElement(obj, 'link') - lnk.set('name', keyword[:-6]) - lnk.set('target', value['target']) + if '__line__' not in keyword: + if len(keyword[:-6]) >= 1 and \ + isinstance(value, dict) and \ + 'target' in value.keys(): + if isinstance(value['target'], str) and len(value['target']) >= 1: + lnk = ET.SubElement(obj, 'link') + lnk.set('name', keyword[:-6]) + lnk.set('target', value['target']) + else: + line_number = '__line__target' + raise ValueError( + keyword + f'Line {value[line_number]}: target argument of link is invalid !') else: - raise ValueError(keyword + ' value for target member of a link is invalid !') + line_number = f'__line__{keyword}' + raise ValueError( + keyword + f'Line {dct[line_number]}: the link formatting is invalid !') else: - raise ValueError(keyword + ' the formatting of what seems to be a link \ -is invalid in the yml file !') + pass -def xml_handle_symbols(obj, value: dict): +def xml_handle_symbols(dct, obj, keyword, value: dict): """Handle a set of NXDL symbols as a child to obj """ - assert len(list(value.keys())) >= 1, 'xml_handle_symbols, symbols tables must not be empty!' + line_number = f'__line__{keyword}' + assert len(list(value.keys()) + ) >= 1, f'Line {dct[line_number]}: symbols table must not be empty !' syms = ET.SubElement(obj, 'symbols') if 'doc' in value.keys(): doctag = ET.SubElement(syms, 'doc') doctag.text = '\n' + textwrap.fill(value['doc'], width=70) + '\n' for kkeyword, vvalue in value.items(): - if kkeyword != 'doc': - assert vvalue is not None and isinstance(vvalue, str), 'Put a comment in doc string!' + if kkeyword != 'doc' and '__line__' not in kkeyword: + line_number = f'__line__{kkeyword}' + assert vvalue is not None and isinstance( + vvalue, str), f'Line {value[line_number]}: put a comment in doc string !' sym = ET.SubElement(syms, 'symbol') sym.set('name', kkeyword) sym_doc = ET.SubElement(sym, 'doc') sym_doc.text = '\n' + textwrap.fill(vvalue, width=70) + '\n' -def check_keyword_variable(verbose, keyword_name, keyword_type, value): +def check_keyword_variable(verbose, dct, keyword, value): """Check whether both keyword_name and keyword_type are empty, and complains if it is the case """ + keyword_name, keyword_type = nx_name_type_resolving(keyword) if verbose: - sys.stdout.write(f'{keyword_name}({keyword_type}): value type is {type(value)}\n') + sys.stdout.write( + f'{keyword_name}({keyword_type}): value type is {type(value)}\n') if keyword_name == '' and keyword_type == '': - raise ValueError('Found an improper YML key !') + line_number = f'__line__{keyword}' + raise ValueError(f'Line {dct[line_number]}: found an improper yaml key !') def helper_keyword_type(kkeyword_type): @@ -253,12 +324,12 @@ def verbose_flag(verbose, keyword, value): sys.stdout.write(f' key:{keyword}; value type is {type(value)}\n') -def second_nested_level_handle(verbose, fld, value): +def second_nested_level_handle(verbose, dct, fld): """When a second dictionary is found inside a value, a new cycle of handlings is run """ - if isinstance(value, dict): - for kkeyword, vvalue in iter(value.items()): + if isinstance(dct, dict): + for kkeyword, vvalue in iter(dct.items()): verbose_flag(verbose, kkeyword, vvalue) if kkeyword[0:2] == NX_ATTR_IDNT: attr = ET.SubElement(fld, 'attribute') @@ -271,48 +342,56 @@ def second_nested_level_handle(verbose, fld, value): typ = helper_keyword_type(kkeyword_type) or 'NX_CHAR' attr.set('type', typ) if isinstance(vvalue, dict): - for kkkeyword, vvvalue in iter(vvalue.items()): - third_nested_level_handle(verbose, attr, kkeyword, kkkeyword, vvvalue) + third_nested_level_handle(verbose, attr, vvalue) elif kkeyword == 'doc': xml_handle_doc(fld, vvalue) elif kkeyword == NX_UNIT_IDNT: xml_handle_units(fld, vvalue) elif kkeyword == 'exists': - xml_handle_exists(fld, vvalue) + xml_handle_exists(dct, fld, kkeyword, vvalue) elif kkeyword == 'dimensions': - xml_handle_dimensions(fld, vvalue) + xml_handle_dimensions(dct, fld, kkeyword, vvalue) elif kkeyword == 'enumeration': - xml_handle_enumeration(fld, vvalue, verbose) + xml_handle_enumeration(dct, fld, kkeyword, vvalue, verbose) elif kkeyword == 'link': fld.set('link', '') + elif '__line__' in kkeyword: + pass else: - raise ValueError(kkeyword, ' faced unknown situation !') + line_number = f'__line__{kkeyword}' + raise ValueError( + kkeyword, f' Line {dct[line_number]}: faced unknown situation !') -def third_nested_level_handle(verbose, attr, kkeyword, kkkeyword, vvvalue): +def third_nested_level_handle(verbose, attr, vvalue_dct): """When a third dictionary is found inside a value, a new cycle of handlings is run """ - verbose_flag(verbose, kkkeyword, vvvalue) - if kkkeyword == 'doc': - xml_handle_doc(attr, vvvalue) - elif kkkeyword == 'exists': - xml_handle_exists(attr, vvvalue) - elif kkkeyword == 'enumeration': - xml_handle_enumeration(attr, vvvalue, verbose) - else: - raise ValueError( - kkeyword, kkkeyword, ' attribute handling !') + for kkkeyword, vvvalue in iter(vvalue_dct.items()): + verbose_flag(verbose, kkkeyword, vvvalue) + if kkkeyword == 'doc': + xml_handle_doc(attr, vvvalue) + elif kkkeyword == 'exists': + xml_handle_exists(vvalue_dct, attr, kkkeyword, vvvalue) + elif kkkeyword == 'enumeration': + xml_handle_enumeration(vvalue_dct, attr, kkkeyword, vvvalue, verbose) + elif '__line__' in kkkeyword: + pass + else: + line_number = f'__line__{kkkeyword}' + raise ValueError( + kkkeyword, f' Line {vvalue_dct[line_number]}: attribute handling error !') -def attribute_attributes_handle(verbose, obj, value, keyword_name): +def attribute_attributes_handle(verbose, dct, obj, value, keyword): """Handle the attributes found connected to attribute field""" # as an attribute identifier + keyword_name = nx_name_type_resolving(keyword) + line_number = f'__line__{keyword}' attr = ET.SubElement(obj, 'attribute') - attr.set('name', keyword_name[2:]) + attr.set('name', keyword_name[0][2:]) if value is not None: - assert isinstance(value, dict), 'the keyword is an attribute, \ -its value must be a dict!' + assert isinstance(value, dict), f'Line {dct[line_number]}: the attribute must be a dict!' for kkeyword, vvalue in iter(value.items()): verbose_flag(verbose, kkeyword, vvalue) if kkeyword == 'name': @@ -322,17 +401,20 @@ def attribute_attributes_handle(verbose, obj, value, keyword_name): elif kkeyword == 'type': attr.set('type', vvalue.upper()) elif kkeyword == 'enumeration': - xml_handle_enumeration(attr, vvalue, verbose) + xml_handle_enumeration(value, attr, kkeyword, vvalue, verbose) elif kkeyword == 'exists': - xml_handle_exists(attr, vvalue) + xml_handle_exists(value, attr, kkeyword, vvalue) + elif '__line__' in kkeyword: + pass else: - raise ValueError(kkeyword + ' facing an unknown situation \ -while processing attributes of an attribute ! Node tag:', obj.tag, 'Node content:', obj.attrib) + line_number = f'__line__{kkeyword}' + raise ValueError(kkeyword + f'Line {value[line_number]}: facing an unknown \ +situation while processing attributes of an attribute !') # handle special keywords (symbols), # assumed that you do not encounter further symbols nested inside -def second_level_attributes_handle(fld, keyword, value): +def second_level_attributes_handle(dct, fld, keyword, value): """If value is not a dictionary, this function handles the attributes of a nested field """ @@ -342,13 +424,15 @@ def second_level_attributes_handle(fld, keyword, value): elif keyword == NX_UNIT_IDNT: xml_handle_units(fld, value) elif keyword[0:2] == NX_ATTR_IDNT: # attribute of a field - raise ValueError(keyword, ' unknown attribute \ - of a field case coming from no dict !') + line_number = f'__line__{keyword}' + raise ValueError(keyword, f' unknown attribute \ + of a field case at line {dct[line_number]} !') elif keyword == 'exists': - xml_handle_exists(fld, value) + xml_handle_exists(dct, fld, keyword, value) elif keyword == 'dimensions': - raise ValueError(keyword, ' unknown dimensions \ - of a field case coming from no dict !') + line_number = f'__line__{keyword}' + raise ValueError(keyword, f' Line {dct[line_number]}: unknown dimensions \ + of a field case !') else: pass @@ -387,38 +471,39 @@ def recursive_build(obj, dct, verbose): """ for keyword, value in iter(dct.items()): keyword_name, keyword_type = nx_name_type_resolving(keyword) - check_keyword_variable(verbose, keyword_name, keyword_type, value) + check_keyword_variable(verbose, dct, keyword, value) if verbose: - sys.stdout.write(f'keyword_name:{keyword_name} keyword_type {keyword_type}\n') + sys.stdout.write( + f'keyword_name:{keyword_name} keyword_type {keyword_type}\n') if keyword[-6:] == '(link)': - xml_handle_link(obj, keyword, value) + xml_handle_link(dct, obj, keyword, value) elif keyword_type == '' and keyword_name == 'symbols': # print(value.key(), type(value.key()), value.value(), type(value.value())) - xml_handle_symbols(obj, value) + xml_handle_symbols(dct, obj, keyword, value) - elif (keyword_type in NX_CLSS) or \ - (keyword_type not in NX_TYPE_KEYS + [''] + NX_NEW_DEFINED_CLASSES): + elif ((keyword_type in NX_CLSS) or (keyword_type not in + NX_TYPE_KEYS + [''] + NX_NEW_DEFINED_CLASSES)) \ + and '__line__' not in keyword_name: # we can be sure we need to instantiate a new group xml_handle_group(verbose, obj, value, keyword_name, keyword_type) elif keyword_name[0:2] == NX_ATTR_IDNT: # check if obj qualifies - attribute_attributes_handle(verbose, obj, value, keyword_name) + attribute_attributes_handle(verbose, dct, obj, value, keyword) elif keyword == 'doc': xml_handle_doc(obj, value) elif keyword == 'enumeration': - xml_handle_enumeration(obj, value, verbose) + xml_handle_enumeration(dct, obj, keyword, value, verbose) elif keyword == 'dimensions': - xml_handle_dimensions(obj, value) + xml_handle_dimensions(dct, obj, keyword, value) elif keyword == 'exists': - xml_handle_exists(obj, value) + xml_handle_exists(dct, obj, keyword, value) - elif keyword_name != '': - fld = not_empty_keyword_name_handle(obj, keyword_type, keyword_name) - second_nested_level_handle(verbose, fld, value) - second_level_attributes_handle(fld, keyword, value) - else: - pass + elif keyword_name != '' and '__line__' not in keyword_name: + fld = not_empty_keyword_name_handle( + obj, keyword_type, keyword_name) + second_nested_level_handle(verbose, value, fld) + second_level_attributes_handle(dct, fld, keyword, value) diff --git a/tests/data/tools/yaml2nxdl_test_data/NXattributes.yml b/tests/data/tools/yaml2nxdl_test_data/NXattributes.yml index 9e5061bc9..1d6dbd7e3 100644 --- a/tests/data/tools/yaml2nxdl_test_data/NXattributes.yml +++ b/tests/data/tools/yaml2nxdl_test_data/NXattributes.yml @@ -4,13 +4,11 @@ symbols: doc: documentation no. 1 testnamesymbol: test description of symbol category: application +extends: my_test_extends (NXellipsometry_base_draft): (NXentry): \@entry: doc: attribute documentation - # ATTRIBUTE DOCUMENTATION - exists: required - #if the exists keyword is not used the default is exists optional doc: documentation no. 2 experiment_identifier: exists: required diff --git a/tests/data/tools/yaml2nxdl_test_data/NXfilelineError1.yml b/tests/data/tools/yaml2nxdl_test_data/NXfilelineError1.yml new file mode 100644 index 000000000..b75af37ab --- /dev/null +++ b/tests/data/tools/yaml2nxdl_test_data/NXfilelineError1.yml @@ -0,0 +1,31 @@ +#test case for attributes +doc: documentation no. 0 +symbols: + doc: documentation no. 1 + testnamesymbol: test description of symbol +category: application +extends: my_test_extend +(NXellipsometry_base_draft): + (NXentry): + \@entry: + doc: attribute documentation + doc: documentation no. 2 + experiment_identifier: + existsss: required + doc: documentation no. 3 + experiment_description: + exists: required + start_time(NX_DATE_TIME): + exists: required + unit: NX_TIME + program_name: + doc: documentation no. 4 + program_version: + doc: documentation no. 5 + time_zone(NX_DATE_TIME): + exists: required + doc: documentation no. 6 + definition_local: + doc: documentation no. 7 + \@version: + # EMPTY ATTRIBUTES diff --git a/tests/data/tools/yaml2nxdl_test_data/NXfilelineError2.yml b/tests/data/tools/yaml2nxdl_test_data/NXfilelineError2.yml new file mode 100644 index 000000000..18b0d0b78 --- /dev/null +++ b/tests/data/tools/yaml2nxdl_test_data/NXfilelineError2.yml @@ -0,0 +1,31 @@ +#test case for attributes +doc: documentation no. 0 +symbols: + doc: documentation no. 1 + testnamesymbol: test description of symbol +category: application +extends: my_test_extend +(NXellipsometry_base_draft): + (NXentry): + \@entry: + doc: attribute documentation + doc: documentation no. 2 + experiment_identifier: + exists: required + doc: documentation no. 3 + experiment_description: + exists: required + start_time(NX_DATE_TIME): + exists: required + unit: NX_TIME + program_name: + dochy: documentation no. 4 + program_version: + doc: documentation no. 5 + time_zone(NX_DATE_TIME): + exists: required + doc: documentation no. 6 + definition_local: + doc: documentation no. 7 + \@version: + # EMPTY ATTRIBUTES diff --git a/tests/data/tools/yaml2nxdl_test_data/NXfilelineError3.yml b/tests/data/tools/yaml2nxdl_test_data/NXfilelineError3.yml new file mode 100644 index 000000000..5cf9619a0 --- /dev/null +++ b/tests/data/tools/yaml2nxdl_test_data/NXfilelineError3.yml @@ -0,0 +1,31 @@ +#test case for attributes +doc: documentation no. 0 +symbols: + doc: documentation no. 1 + testnamesymbol: test description of symbol +category: application +extends: my_test_extend +(NXellipsometry_base_draft): + (NXentry): + \@entry: + doc: attribute documentation + doc: documentation no. 2 + experiment_identifier: + exists: required + doc: documentation no. 3 + experiment_description: + exists: required + start_time(NX_DATE_TIME): + exists: required + unit: NX_TIME + program_name: + doc: documentation no. 4 + program_version: + doc: documentation no. 5 + time_zone(NX_DATE_TIME): + exists: + doc: documentation no. 6 + definition_local: + doc: documentation no. 7 + \@version: + # EMPTY ATTRIBUTES diff --git a/tests/data/tools/yaml2nxdl_test_data/Ref_NXattributes.nxdl.xml b/tests/data/tools/yaml2nxdl_test_data/Ref_NXattributes.nxdl.xml index 9bd38796b..4f54dd086 100644 --- a/tests/data/tools/yaml2nxdl_test_data/Ref_NXattributes.nxdl.xml +++ b/tests/data/tools/yaml2nxdl_test_data/Ref_NXattributes.nxdl.xml @@ -1,6 +1,6 @@ - + documentation no. 1 @@ -14,7 +14,7 @@ documentation no. 0 - + attribute documentation @@ -23,13 +23,13 @@ documentation no. 2 - + documentation no. 3 - - + + documentation no. 4 @@ -40,7 +40,7 @@ documentation no. 5 - + documentation no. 6 diff --git a/tests/tools/yaml2nxdl/test_yml2nxdl.py b/tests/tools/yaml2nxdl/test_yml2nxdl.py index a3f1c9bc1..7fefc87e6 100755 --- a/tests/tools/yaml2nxdl/test_yml2nxdl.py +++ b/tests/tools/yaml2nxdl/test_yml2nxdl.py @@ -98,7 +98,7 @@ def compare_matches(ref_xml_file, test_yml_file, test_xml_file, desired_matches) def test_links(): - """First test: check the correct parsing of links + """Check the correct parsing of links """ ref_xml_link_file = 'tests/data/tools/yaml2nxdl_test_data/Ref_NXtest_links.nxdl.xml' @@ -151,8 +151,30 @@ def test_nxdl2yaml_doc_format(): sys.stdout.write('Test on xml -> yml doc formatting okay.\n') +def test_fileline_error(): + """In this test the yaml fileline in the error message is tested. + + """ + test_yml_file = 'tests/data/tools/yaml2nxdl_test_data/NXfilelineError1.yml' + result = CliRunner().invoke(yml2nxdl.launch_tool, ['--input-file', test_yml_file]) + assert result.exit_code == 1 + assert '14' in str(result.exception) + + test_yml_file = 'tests/data/tools/yaml2nxdl_test_data/NXfilelineError2.yml' + result = CliRunner().invoke(yml2nxdl.launch_tool, ['--input-file', test_yml_file]) + assert result.exit_code == 1 + assert '22' in str(result.exception) + + test_yml_file = 'tests/data/tools/yaml2nxdl_test_data/NXfilelineError3.yml' + result = CliRunner().invoke(yml2nxdl.launch_tool, ['--input-file', test_yml_file]) + assert result.exit_code == 1 + assert '26' in str(result.exception) + + sys.stdout.write('Test on xml -> yml fileline error handling okay.\n') + + def test_symbols(): - """Second test: check the correct parsing of symbols + """Check the correct parsing of symbols """ ref_xml_symbol_file = 'tests/data/tools/yaml2nxdl_test_data/Ref_NXnested_symbols.nxdl.xml' @@ -169,7 +191,7 @@ def test_symbols(): def test_attributes(): - """Third test: check the correct handling of empty attributes + """Check the correct handling of empty attributes or attributes fields, e.g. doc """ @@ -186,8 +208,25 @@ def test_attributes(): sys.stdout.write('Test on attributes okay.\n') +def test_extends(): + """Check the correct handling of extends keyword + +""" + ref_xml_attribute_file = 'tests/data/tools/yaml2nxdl_test_data/Ref_NXattributes.nxdl.xml' + test_yml_attribute_file = 'tests/data/tools/yaml2nxdl_test_data/NXattributes.yml' + test_xml_attribute_file = 'tests/data/tools/yaml2nxdl_test_data/NXattributes.nxdl.xml' + desired_matches = ['extends="my_test_extends"'] + compare_matches( + ref_xml_attribute_file, + test_yml_attribute_file, + test_xml_attribute_file, + desired_matches) + os.remove('tests/data/tools/yaml2nxdl_test_data/NXattributes.nxdl.xml') + sys.stdout.write('Test on extends keyword okay.\n') + + def test_symbols_and_enum_docs(): - """Third test: check the correct handling of empty attributes + """Check the correct handling of empty attributes or attributes fields, e.g. doc """