Skip to content

Commit

Permalink
adapt species data
Browse files Browse the repository at this point in the history
  • Loading branch information
samirdarouich committed Mar 4, 2024
1 parent a31a446 commit 5006171
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 87 deletions.
23 changes: 23 additions & 0 deletions FAIRFlowChemistry/core/calibration.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sdRDM
import numpy as np

from typing import Dict, List, Optional
from pydantic import PrivateAttr, model_validator
Expand Down Expand Up @@ -67,3 +68,25 @@ def _parse_raw_xml_data(self):
elif isinstance(value, _Element):
self._raw_xml_data[attr] = elem2dict(value)
return self

def calibrate(self):
"""
Calibrate the regression model on seen data
"""

self.regression_coefficients = np.polynomial.polynomial.polyfit(
self.peak_areas.values, self.concentrations.values, self.degree
).tolist()

def predict(self, x: list) -> np.ndarray:
"""
Predict with regression model
Args:
x (1D list): New locations for which predictions should be made
Returns:
(1D numpy array): Predicted data at new locations
"""

return np.polynomial.Polynomial(self.regression_coefficients)(np.array(x))
38 changes: 37 additions & 1 deletion FAIRFlowChemistry/core/experiment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sdRDM
import pandas as pd

from typing import Dict, List, Optional
from pydantic import PrivateAttr, model_validator
Expand All @@ -16,7 +17,8 @@
from .plantsetup import PlantSetup
from .metadata import Metadata
from .calibration import Calibration

from .quantity import Quantity
from .datatype import DataType

@forge_signature
class Experiment(sdRDM.DataModel):
Expand Down Expand Up @@ -133,3 +135,37 @@ def add_to_species_data(
params["id"] = id
self.species_data.append(SpeciesData(**params))
return self.species_data[-1]

@property
def volumetric_flow_time_course(self) -> list:
"""This property extracts the volumetric flow time as well as the flow it self from the experiment class
Returns:
list: Datetime list and flow value list
"""
volumetric_flow_datetime_list = []
volumetric_flow_values_list = []

mfm_measurements = self.get(
"measurements", "measurement_type", "MFM measurement"
)[0]
for mfm_measurement in mfm_measurements:
volumetric_flow_datetime_list.extend(
mfm_measurement.get(
"experimental_data", "quantity", Quantity.DATETIME.value
)[0][0].values
)
volumetric_flow_values_list.extend(
mfm_measurement.get(
"experimental_data", "quantity", Quantity.VOLUMETRICFLOWRATE.value
)[0][0].values
)

# If data is directly read in from the experiment, it is the correct format, if read from json dataset, it is a string and needs to be converted
if not type(volumetric_flow_datetime_list[0]) == DataType.DATETIME.value:
volumetric_flow_datetime_list = [
pd.to_datetime(timestamp, format="%Y-%m-%dT%H:%M:%S").to_pydatetime()
for timestamp in volumetric_flow_datetime_list
]

return [volumetric_flow_datetime_list, volumetric_flow_values_list]
2 changes: 2 additions & 0 deletions FAIRFlowChemistry/tools/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from FAIRFlowChemistry.core import ComponentType




def gc_parser(metadata_path: Path, experimental_data_path: Path):
"""
Function that reads in a file from a gas chromotography. Important information that is extracted is the
Expand Down
253 changes: 169 additions & 84 deletions Main.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -139,111 +139,196 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"YAML Output:\n",
"Carbon dioxide:\n",
" chemical_formula: CO2\n",
" concentrations:\n",
" - 0\n",
" - 50\n",
" peak_areas:\n",
" - 0\n",
" - 38653\n",
"Carbon monoxide:\n",
" chemical_formula: CO\n",
" concentrations:\n",
" - 0.5\n",
" - 1\n",
" - 5\n",
" peak_areas:\n",
" - 797\n",
" - 1328\n",
" - 7223\n",
"Ethane:\n",
" chemical_formula: C2H6\n",
" concentrations:\n",
" - 0\n",
" - 5\n",
" peak_areas:\n",
" - 0\n",
" - 12168\n",
"Ethene:\n",
" chemical_formula: C2H4\n",
" concentrations:\n",
" - 0.5\n",
" - 2\n",
" - 3\n",
" peak_areas:\n",
" - 1122\n",
" - 4864\n",
" - 7297\n",
"Hydrogen:\n",
" chemical_formula: H2\n",
" concentrations:\n",
" - 5\n",
" - 10\n",
" - 20\n",
" peak_areas:\n",
" - 71\n",
" - 153\n",
" - 330\n",
"Methane:\n",
" chemical_formula: CH4\n",
" concentrations:\n",
" - 5\n",
" - 10\n",
" peak_areas:\n",
" - 5727\n",
" - 11991\n",
"\n"
]
}
],
"outputs": [],
"source": [
"## Change initalizing species data!"
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'Hydrogen': {'chemical_formula': 'H2',\n",
" 'peak_areas': [71, 153, 330],\n",
" 'concentrations': [5, 10, 20]},\n",
"{'Carbon dioxide': {'chemical_formula': 'CO2',\n",
" 'calibration': {'concentrations': [0, 50], 'peak_areas': [0, 38653]},\n",
" 'correction_factor': 0.74,\n",
" 'electron_transfer': 2},\n",
" 'Carbon monoxide': {'chemical_formula': 'CO',\n",
" 'peak_areas': [797, 1328, 7223],\n",
" 'concentrations': [0.5, 1, 5]},\n",
" 'Carbon dioxide': {'chemical_formula': 'CO2',\n",
" 'peak_areas': [0, 38653],\n",
" 'concentrations': [0, 50]},\n",
" 'Methane': {'chemical_formula': 'CH4',\n",
" 'peak_areas': [5727, 11991],\n",
" 'concentrations': [5, 10]},\n",
" 'Ethene': {'chemical_formula': 'C2H4',\n",
" 'peak_areas': [1122, 4864, 7297],\n",
" 'concentrations': [0.5, 2, 3]},\n",
" 'calibration': {'concentrations': [0.5, 1, 5],\n",
" 'peak_areas': [797, 1328, 7223]},\n",
" 'correction_factor': 1.0,\n",
" 'electron_transfer': 2},\n",
" 'Hydrogen': {'chemical_formula': 'H2',\n",
" 'calibration': {'concentrations': [5, 10, 20], 'peak_areas': [71, 153, 330]},\n",
" 'correction_factor': 1.01,\n",
" 'electron_transfer': 2},\n",
" 'Ethane': {'chemical_formula': 'C2H6',\n",
" 'peak_areas': [0, 12168],\n",
" 'concentrations': [0, 5]}}"
" 'calibration': {'concentrations': [0, 5], 'peak_areas': [0, 12168]},\n",
" 'correction_factor': None,\n",
" 'electron_transfer': 16},\n",
" 'Ethene': {'chemical_formula': 'C2H4',\n",
" 'calibration': {'concentrations': [0.5, 2, 3],\n",
" 'peak_areas': [1122, 4864, 7297]},\n",
" 'correction_factor': None,\n",
" 'electron_transfer': 12},\n",
" 'Methane': {'chemical_formula': 'CH4',\n",
" 'calibration': {'concentrations': [5, 10], 'peak_areas': [5727, 11991]},\n",
" 'correction_factor': 0.76,\n",
" 'electron_transfer': 8}}"
]
},
"execution_count": 9,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"json_data"
"import yaml\n",
"from FAIRFlowChemistry.core import Calibration, Data, Quantity, Experiment\n",
"\n",
"\n",
"with open(\"data/additional_data/species_data.yaml\") as f:\n",
" species_data = yaml.safe_load(f)\n",
"species_data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"test = Experiment()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[4mCalibration\u001b[0m\n",
"├── \u001b[94mid\u001b[0m = bcef0a33-0954-40e8-84ce-812c2aa34286\n",
"├── \u001b[94mpeak_areas\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = 6dbccfbd-2c00-478e-89d9-f17a37cdad92\n",
"│ ├── \u001b[94mquantity\u001b[0m = Peak area\n",
"│ └── \u001b[94mvalues\u001b[0m = [0.0, 38653.0, ...]\n",
"├── \u001b[94mconcentrations\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = 7824671b-6ad3-48e5-a08e-75fda85486bd\n",
"│ ├── \u001b[94mquantity\u001b[0m = Concentration\n",
"│ └── \u001b[94mvalues\u001b[0m = [0.0, 50.0, ...]\n",
"├── \u001b[94mregression_coefficients\u001b[0m = [-4.466559424312742e-15, 0.0012935606550591155, ...]\n",
"└── \u001b[94mdegree\u001b[0m = 1\n",
"\n",
"\u001b[4mCalibration\u001b[0m\n",
"├── \u001b[94mid\u001b[0m = 66c51884-1dae-451e-b103-e6825139a3d4\n",
"├── \u001b[94mpeak_areas\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = 23ce97b3-17cc-461d-9611-007fb871847e\n",
"│ ├── \u001b[94mquantity\u001b[0m = Peak area\n",
"│ └── \u001b[94mvalues\u001b[0m = [797.0, 1328.0, 7223.0, ...]\n",
"├── \u001b[94mconcentrations\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = aac491a0-c4f5-4089-bb3b-e6da4fa87d5b\n",
"│ ├── \u001b[94mquantity\u001b[0m = Concentration\n",
"│ └── \u001b[94mvalues\u001b[0m = [0.5, 1.0, 5.0, ...]\n",
"├── \u001b[94mregression_coefficients\u001b[0m = [0.012656829292048657, 0.0006912740171292097, ...]\n",
"└── \u001b[94mdegree\u001b[0m = 1\n",
"\n",
"\u001b[4mCalibration\u001b[0m\n",
"├── \u001b[94mid\u001b[0m = abcb5f13-bdc7-4184-81e3-72f43d209ba1\n",
"├── \u001b[94mpeak_areas\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = cf425f61-3655-40af-a425-e6b1d50b70df\n",
"│ ├── \u001b[94mquantity\u001b[0m = Peak area\n",
"│ └── \u001b[94mvalues\u001b[0m = [71.0, 153.0, 330.0, ...]\n",
"├── \u001b[94mconcentrations\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = 0531cb42-b9e1-4065-a1cf-2e564f28084a\n",
"│ ├── \u001b[94mquantity\u001b[0m = Concentration\n",
"│ └── \u001b[94mvalues\u001b[0m = [5.0, 10.0, 20.0, ...]\n",
"├── \u001b[94mregression_coefficients\u001b[0m = [1.0135636425894583, 0.05768828352388381, ...]\n",
"└── \u001b[94mdegree\u001b[0m = 1\n",
"\n",
"\u001b[4mCalibration\u001b[0m\n",
"├── \u001b[94mid\u001b[0m = 94115cf4-c6d5-4756-997c-e431378b28ac\n",
"├── \u001b[94mpeak_areas\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = 41a4c0e7-46f6-4102-b790-54d8ef169a1c\n",
"│ ├── \u001b[94mquantity\u001b[0m = Peak area\n",
"│ └── \u001b[94mvalues\u001b[0m = [0.0, 12168.0, ...]\n",
"├── \u001b[94mconcentrations\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = e6ae0432-d07f-4d02-b56b-265045b8404e\n",
"│ ├── \u001b[94mquantity\u001b[0m = Concentration\n",
"│ └── \u001b[94mvalues\u001b[0m = [0.0, 5.0, ...]\n",
"├── \u001b[94mregression_coefficients\u001b[0m = [3.146078039972546e-16, 0.0004109138724523339, ...]\n",
"└── \u001b[94mdegree\u001b[0m = 1\n",
"\n",
"\u001b[4mCalibration\u001b[0m\n",
"├── \u001b[94mid\u001b[0m = 683a26eb-79bc-4c6c-837f-617cb7f3577a\n",
"├── \u001b[94mpeak_areas\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = d894b18d-c1f7-4920-8553-64de6277e082\n",
"│ ├── \u001b[94mquantity\u001b[0m = Peak area\n",
"│ └── \u001b[94mvalues\u001b[0m = [1122.0, 4864.0, 7297.0, ...]\n",
"├── \u001b[94mconcentrations\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = 05af66ef-32ce-4a6e-86c7-d7e7353401ad\n",
"│ ├── \u001b[94mquantity\u001b[0m = Concentration\n",
"│ └── \u001b[94mvalues\u001b[0m = [0.5, 2.0, 3.0, ...]\n",
"├── \u001b[94mregression_coefficients\u001b[0m = [0.04225127219798618, 0.0004045205287514898, ...]\n",
"└── \u001b[94mdegree\u001b[0m = 1\n",
"\n",
"\u001b[4mCalibration\u001b[0m\n",
"├── \u001b[94mid\u001b[0m = 98bc4c48-7644-4fbd-bd1f-ef2092ca2a5e\n",
"├── \u001b[94mpeak_areas\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = a30301a1-a5fc-4c40-9e05-1448cb73baf5\n",
"│ ├── \u001b[94mquantity\u001b[0m = Peak area\n",
"│ └── \u001b[94mvalues\u001b[0m = [5727.0, 11991.0, ...]\n",
"├── \u001b[94mconcentrations\u001b[0m\n",
"│ └── \u001b[4mData\u001b[0m\n",
"│ ├── \u001b[94mid\u001b[0m = d38f9ca7-29bf-4547-a6d7-6899d29a4d33\n",
"│ ├── \u001b[94mquantity\u001b[0m = Concentration\n",
"│ └── \u001b[94mvalues\u001b[0m = [5.0, 10.0, ...]\n",
"├── \u001b[94mregression_coefficients\u001b[0m = [0.4286398467432958, 0.0007982120051085568, ...]\n",
"└── \u001b[94mdegree\u001b[0m = 1\n",
"\n"
]
}
],
"source": [
"for species,item in species_data.items():\n",
"\n",
" # Create Calibration object and fit it to the given data\n",
" calibration = Calibration(\n",
" peak_areas=Data(\n",
" quantity=\"Peak area\", values=item[\"calibration\"][\"peak_areas\"]\n",
" ),\n",
" concentrations=Data(\n",
" quantity=Quantity.CONCENTRATION.value,\n",
" values=item[\"calibration\"][\"concentrations\"],\n",
" ),\n",
" )\n",
" calibration.calibrate()\n",
" \n",
" test.add_to_species_data( species = species, \n",
" chemical_formula = item[\"chemical_formula\"],\n",
" calibration = calibration,\n",
" correction_factor = item[\"correction_factor\"],\n",
" electron_transfer = item[\"electron_transfer\"]\n",
" )"
]
},
{
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions specifications/datamodel.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ This is the perliminary data model for CRC 1333 project B07. At the current time
- correction_factor
- Type: float
- Description: correction factors of the individual species.
- faraday_coefficient
- electron_transfer
- Type: float
- Description: Faraday coefficients of the individual species.
- Description: Number of transfered electrons of the individual species.
- faraday_efficiency
- Type: Data
- Description: Faraday efficiencies of the individual species.
Expand Down

0 comments on commit 5006171

Please sign in to comment.