diff --git a/FAIRFlowChemistry/core/calibration.py b/FAIRFlowChemistry/core/calibration.py index fd97b53..95fb939 100644 --- a/FAIRFlowChemistry/core/calibration.py +++ b/FAIRFlowChemistry/core/calibration.py @@ -1,4 +1,5 @@ import sdRDM +import numpy as np from typing import Dict, List, Optional from pydantic import PrivateAttr, model_validator @@ -67,3 +68,25 @@ def _parse_raw_xml_data(self): elif isinstance(value, _Element): self._raw_xml_data[attr] = elem2dict(value) return self + + def calibrate(self): + """ + Calibrate the regression model on seen data + """ + + self.regression_coefficients = np.polynomial.polynomial.polyfit( + self.peak_areas.values, self.concentrations.values, self.degree + ).tolist() + + def predict(self, x: list) -> np.ndarray: + """ + Predict with regression model + + Args: + x (1D list): New locations for which predictions should be made + + Returns: + (1D numpy array): Predicted data at new locations + """ + + return np.polynomial.Polynomial(self.regression_coefficients)(np.array(x)) \ No newline at end of file diff --git a/FAIRFlowChemistry/core/experiment.py b/FAIRFlowChemistry/core/experiment.py index 94200ee..072fefa 100644 --- a/FAIRFlowChemistry/core/experiment.py +++ b/FAIRFlowChemistry/core/experiment.py @@ -1,4 +1,5 @@ import sdRDM +import pandas as pd from typing import Dict, List, Optional from pydantic import PrivateAttr, model_validator @@ -16,7 +17,8 @@ from .plantsetup import PlantSetup from .metadata import Metadata from .calibration import Calibration - +from .quantity import Quantity +from .datatype import DataType @forge_signature class Experiment(sdRDM.DataModel): @@ -133,3 +135,37 @@ def add_to_species_data( params["id"] = id self.species_data.append(SpeciesData(**params)) return self.species_data[-1] + + @property + def volumetric_flow_time_course(self) -> list: + """This property extracts the volumetric flow time as well as the flow it self from the experiment class + + Returns: + list: Datetime list and flow value list + """ + volumetric_flow_datetime_list = [] + volumetric_flow_values_list = [] + + mfm_measurements = self.get( + "measurements", "measurement_type", "MFM measurement" + )[0] + for mfm_measurement in mfm_measurements: + volumetric_flow_datetime_list.extend( + mfm_measurement.get( + "experimental_data", "quantity", Quantity.DATETIME.value + )[0][0].values + ) + volumetric_flow_values_list.extend( + mfm_measurement.get( + "experimental_data", "quantity", Quantity.VOLUMETRICFLOWRATE.value + )[0][0].values + ) + + # If data is directly read in from the experiment, it is the correct format, if read from json dataset, it is a string and needs to be converted + if not type(volumetric_flow_datetime_list[0]) == DataType.DATETIME.value: + volumetric_flow_datetime_list = [ + pd.to_datetime(timestamp, format="%Y-%m-%dT%H:%M:%S").to_pydatetime() + for timestamp in volumetric_flow_datetime_list + ] + + return [volumetric_flow_datetime_list, volumetric_flow_values_list] \ No newline at end of file diff --git a/FAIRFlowChemistry/tools/reader.py b/FAIRFlowChemistry/tools/reader.py index 3913dc6..7d377cc 100644 --- a/FAIRFlowChemistry/tools/reader.py +++ b/FAIRFlowChemistry/tools/reader.py @@ -14,6 +14,8 @@ from FAIRFlowChemistry.core import ComponentType + + def gc_parser(metadata_path: Path, experimental_data_path: Path): """ Function that reads in a file from a gas chromotography. Important information that is extracted is the diff --git a/Main.ipynb b/Main.ipynb index 94b705c..c09a26e 100644 --- a/Main.ipynb +++ b/Main.ipynb @@ -139,111 +139,196 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "YAML Output:\n", - "Carbon dioxide:\n", - " chemical_formula: CO2\n", - " concentrations:\n", - " - 0\n", - " - 50\n", - " peak_areas:\n", - " - 0\n", - " - 38653\n", - "Carbon monoxide:\n", - " chemical_formula: CO\n", - " concentrations:\n", - " - 0.5\n", - " - 1\n", - " - 5\n", - " peak_areas:\n", - " - 797\n", - " - 1328\n", - " - 7223\n", - "Ethane:\n", - " chemical_formula: C2H6\n", - " concentrations:\n", - " - 0\n", - " - 5\n", - " peak_areas:\n", - " - 0\n", - " - 12168\n", - "Ethene:\n", - " chemical_formula: C2H4\n", - " concentrations:\n", - " - 0.5\n", - " - 2\n", - " - 3\n", - " peak_areas:\n", - " - 1122\n", - " - 4864\n", - " - 7297\n", - "Hydrogen:\n", - " chemical_formula: H2\n", - " concentrations:\n", - " - 5\n", - " - 10\n", - " - 20\n", - " peak_areas:\n", - " - 71\n", - " - 153\n", - " - 330\n", - "Methane:\n", - " chemical_formula: CH4\n", - " concentrations:\n", - " - 5\n", - " - 10\n", - " peak_areas:\n", - " - 5727\n", - " - 11991\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "## Change initalizing species data!" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'Hydrogen': {'chemical_formula': 'H2',\n", - " 'peak_areas': [71, 153, 330],\n", - " 'concentrations': [5, 10, 20]},\n", + "{'Carbon dioxide': {'chemical_formula': 'CO2',\n", + " 'calibration': {'concentrations': [0, 50], 'peak_areas': [0, 38653]},\n", + " 'correction_factor': 0.74,\n", + " 'electron_transfer': 2},\n", " 'Carbon monoxide': {'chemical_formula': 'CO',\n", - " 'peak_areas': [797, 1328, 7223],\n", - " 'concentrations': [0.5, 1, 5]},\n", - " 'Carbon dioxide': {'chemical_formula': 'CO2',\n", - " 'peak_areas': [0, 38653],\n", - " 'concentrations': [0, 50]},\n", - " 'Methane': {'chemical_formula': 'CH4',\n", - " 'peak_areas': [5727, 11991],\n", - " 'concentrations': [5, 10]},\n", - " 'Ethene': {'chemical_formula': 'C2H4',\n", - " 'peak_areas': [1122, 4864, 7297],\n", - " 'concentrations': [0.5, 2, 3]},\n", + " 'calibration': {'concentrations': [0.5, 1, 5],\n", + " 'peak_areas': [797, 1328, 7223]},\n", + " 'correction_factor': 1.0,\n", + " 'electron_transfer': 2},\n", + " 'Hydrogen': {'chemical_formula': 'H2',\n", + " 'calibration': {'concentrations': [5, 10, 20], 'peak_areas': [71, 153, 330]},\n", + " 'correction_factor': 1.01,\n", + " 'electron_transfer': 2},\n", " 'Ethane': {'chemical_formula': 'C2H6',\n", - " 'peak_areas': [0, 12168],\n", - " 'concentrations': [0, 5]}}" + " 'calibration': {'concentrations': [0, 5], 'peak_areas': [0, 12168]},\n", + " 'correction_factor': None,\n", + " 'electron_transfer': 16},\n", + " 'Ethene': {'chemical_formula': 'C2H4',\n", + " 'calibration': {'concentrations': [0.5, 2, 3],\n", + " 'peak_areas': [1122, 4864, 7297]},\n", + " 'correction_factor': None,\n", + " 'electron_transfer': 12},\n", + " 'Methane': {'chemical_formula': 'CH4',\n", + " 'calibration': {'concentrations': [5, 10], 'peak_areas': [5727, 11991]},\n", + " 'correction_factor': 0.76,\n", + " 'electron_transfer': 8}}" ] }, - "execution_count": 9, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "json_data" + "import yaml\n", + "from FAIRFlowChemistry.core import Calibration, Data, Quantity, Experiment\n", + "\n", + "\n", + "with open(\"data/additional_data/species_data.yaml\") as f:\n", + " species_data = yaml.safe_load(f)\n", + "species_data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "test = Experiment()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[4mCalibration\u001b[0m\n", + "├── \u001b[94mid\u001b[0m = bcef0a33-0954-40e8-84ce-812c2aa34286\n", + "├── \u001b[94mpeak_areas\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = 6dbccfbd-2c00-478e-89d9-f17a37cdad92\n", + "│ ├── \u001b[94mquantity\u001b[0m = Peak area\n", + "│ └── \u001b[94mvalues\u001b[0m = [0.0, 38653.0, ...]\n", + "├── \u001b[94mconcentrations\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = 7824671b-6ad3-48e5-a08e-75fda85486bd\n", + "│ ├── \u001b[94mquantity\u001b[0m = Concentration\n", + "│ └── \u001b[94mvalues\u001b[0m = [0.0, 50.0, ...]\n", + "├── \u001b[94mregression_coefficients\u001b[0m = [-4.466559424312742e-15, 0.0012935606550591155, ...]\n", + "└── \u001b[94mdegree\u001b[0m = 1\n", + "\n", + "\u001b[4mCalibration\u001b[0m\n", + "├── \u001b[94mid\u001b[0m = 66c51884-1dae-451e-b103-e6825139a3d4\n", + "├── \u001b[94mpeak_areas\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = 23ce97b3-17cc-461d-9611-007fb871847e\n", + "│ ├── \u001b[94mquantity\u001b[0m = Peak area\n", + "│ └── \u001b[94mvalues\u001b[0m = [797.0, 1328.0, 7223.0, ...]\n", + "├── \u001b[94mconcentrations\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = aac491a0-c4f5-4089-bb3b-e6da4fa87d5b\n", + "│ ├── \u001b[94mquantity\u001b[0m = Concentration\n", + "│ └── \u001b[94mvalues\u001b[0m = [0.5, 1.0, 5.0, ...]\n", + "├── \u001b[94mregression_coefficients\u001b[0m = [0.012656829292048657, 0.0006912740171292097, ...]\n", + "└── \u001b[94mdegree\u001b[0m = 1\n", + "\n", + "\u001b[4mCalibration\u001b[0m\n", + "├── \u001b[94mid\u001b[0m = abcb5f13-bdc7-4184-81e3-72f43d209ba1\n", + "├── \u001b[94mpeak_areas\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = cf425f61-3655-40af-a425-e6b1d50b70df\n", + "│ ├── \u001b[94mquantity\u001b[0m = Peak area\n", + "│ └── \u001b[94mvalues\u001b[0m = [71.0, 153.0, 330.0, ...]\n", + "├── \u001b[94mconcentrations\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = 0531cb42-b9e1-4065-a1cf-2e564f28084a\n", + "│ ├── \u001b[94mquantity\u001b[0m = Concentration\n", + "│ └── \u001b[94mvalues\u001b[0m = [5.0, 10.0, 20.0, ...]\n", + "├── \u001b[94mregression_coefficients\u001b[0m = [1.0135636425894583, 0.05768828352388381, ...]\n", + "└── \u001b[94mdegree\u001b[0m = 1\n", + "\n", + "\u001b[4mCalibration\u001b[0m\n", + "├── \u001b[94mid\u001b[0m = 94115cf4-c6d5-4756-997c-e431378b28ac\n", + "├── \u001b[94mpeak_areas\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = 41a4c0e7-46f6-4102-b790-54d8ef169a1c\n", + "│ ├── \u001b[94mquantity\u001b[0m = Peak area\n", + "│ └── \u001b[94mvalues\u001b[0m = [0.0, 12168.0, ...]\n", + "├── \u001b[94mconcentrations\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = e6ae0432-d07f-4d02-b56b-265045b8404e\n", + "│ ├── \u001b[94mquantity\u001b[0m = Concentration\n", + "│ └── \u001b[94mvalues\u001b[0m = [0.0, 5.0, ...]\n", + "├── \u001b[94mregression_coefficients\u001b[0m = [3.146078039972546e-16, 0.0004109138724523339, ...]\n", + "└── \u001b[94mdegree\u001b[0m = 1\n", + "\n", + "\u001b[4mCalibration\u001b[0m\n", + "├── \u001b[94mid\u001b[0m = 683a26eb-79bc-4c6c-837f-617cb7f3577a\n", + "├── \u001b[94mpeak_areas\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = d894b18d-c1f7-4920-8553-64de6277e082\n", + "│ ├── \u001b[94mquantity\u001b[0m = Peak area\n", + "│ └── \u001b[94mvalues\u001b[0m = [1122.0, 4864.0, 7297.0, ...]\n", + "├── \u001b[94mconcentrations\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = 05af66ef-32ce-4a6e-86c7-d7e7353401ad\n", + "│ ├── \u001b[94mquantity\u001b[0m = Concentration\n", + "│ └── \u001b[94mvalues\u001b[0m = [0.5, 2.0, 3.0, ...]\n", + "├── \u001b[94mregression_coefficients\u001b[0m = [0.04225127219798618, 0.0004045205287514898, ...]\n", + "└── \u001b[94mdegree\u001b[0m = 1\n", + "\n", + "\u001b[4mCalibration\u001b[0m\n", + "├── \u001b[94mid\u001b[0m = 98bc4c48-7644-4fbd-bd1f-ef2092ca2a5e\n", + "├── \u001b[94mpeak_areas\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = a30301a1-a5fc-4c40-9e05-1448cb73baf5\n", + "│ ├── \u001b[94mquantity\u001b[0m = Peak area\n", + "│ └── \u001b[94mvalues\u001b[0m = [5727.0, 11991.0, ...]\n", + "├── \u001b[94mconcentrations\u001b[0m\n", + "│ └── \u001b[4mData\u001b[0m\n", + "│ ├── \u001b[94mid\u001b[0m = d38f9ca7-29bf-4547-a6d7-6899d29a4d33\n", + "│ ├── \u001b[94mquantity\u001b[0m = Concentration\n", + "│ └── \u001b[94mvalues\u001b[0m = [5.0, 10.0, ...]\n", + "├── \u001b[94mregression_coefficients\u001b[0m = [0.4286398467432958, 0.0007982120051085568, ...]\n", + "└── \u001b[94mdegree\u001b[0m = 1\n", + "\n" + ] + } + ], + "source": [ + "for species,item in species_data.items():\n", + "\n", + " # Create Calibration object and fit it to the given data\n", + " calibration = Calibration(\n", + " peak_areas=Data(\n", + " quantity=\"Peak area\", values=item[\"calibration\"][\"peak_areas\"]\n", + " ),\n", + " concentrations=Data(\n", + " quantity=Quantity.CONCENTRATION.value,\n", + " values=item[\"calibration\"][\"concentrations\"],\n", + " ),\n", + " )\n", + " calibration.calibrate()\n", + " \n", + " test.add_to_species_data( species = species, \n", + " chemical_formula = item[\"chemical_formula\"],\n", + " calibration = calibration,\n", + " correction_factor = item[\"correction_factor\"],\n", + " electron_transfer = item[\"electron_transfer\"]\n", + " )" ] }, { diff --git a/data/additional_data/specific_information.yaml b/data/additional_data/species_data.yaml similarity index 100% rename from data/additional_data/specific_information.yaml rename to data/additional_data/species_data.yaml diff --git a/specifications/datamodel.md b/specifications/datamodel.md index ad7438d..40a6c9d 100644 --- a/specifications/datamodel.md +++ b/specifications/datamodel.md @@ -68,9 +68,9 @@ This is the perliminary data model for CRC 1333 project B07. At the current time - correction_factor - Type: float - Description: correction factors of the individual species. -- faraday_coefficient +- electron_transfer - Type: float - - Description: Faraday coefficients of the individual species. + - Description: Number of transfered electrons of the individual species. - faraday_efficiency - Type: Data - Description: Faraday efficiencies of the individual species.