Skip to content

Commit

Permalink
update several things
Browse files Browse the repository at this point in the history
update markdown
implement librarian
update parser
  • Loading branch information
sH4MbLe5 committed Sep 23, 2023
1 parent 23f9fc0 commit ac55b72
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 162 deletions.
3 changes: 1 addition & 2 deletions 2.2_Experimental_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
"metadata": {},
"source": [
"# <center>Workflow for the CRC1333 project B07 - Technical Chemistry</center>\n",
"# <center>Experimental notebook</center>\n",
"# <center>2.2 Analysis</center>"
"# <center>2.2 Experimental notebook - Analysis</center>"
]
},
{
Expand Down
8 changes: 8 additions & 0 deletions 3.1_DaRUS_upload.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# <center>Workflow for the CRC1333 project B07 - Technical Chemistry</center>\n",
"# <center>3.1 DaRUS upload</center>"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
1 change: 1 addition & 0 deletions datamodel_b07_tc/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .readers.gcparser import GCParser
from .readers.gstaticparser import GstaticParser
from .readers.mfmparser import MFMParser
from .readers.librarian import Librarian
from .calculus.calibrator import Calibrator
from .calculus.faraday_efficiency import FaradayEfficiencyCalculator
from .calculus.assign_peak_areas import PeakAreaAssignment
154 changes: 28 additions & 126 deletions datamodel_b07_tc/tools/readers/gcparser.py
Original file line number Diff line number Diff line change
@@ -1,119 +1,44 @@
import pandas as pd
import os

from pathlib import Path
from datamodel_b07_tc.core.data import Data
from datamodel_b07_tc.core.data import Quantity
from datamodel_b07_tc.core.metadata import Metadata
from pydantic import BaseModel

from datamodel_b07_tc.modified.measurement import Measurement
from datamodel_b07_tc.modified.data import Data
from datamodel_b07_tc.modified.metadata import Metadata

class GCParser:
def __init__(
self,
directory_paths: str | bytes | os.PathLike | Path,
filename_meta: str,
filename_exp: str,
):
"""Pass the path to a directory containing CSV-type files of the MFM to be
read.

Args:
path_to_directory (str | bytes | os.PathLike): Path to a directory containing CSV-type files.
"""
path_list_meta = []
for path in directory_paths:
path_list_meta.extend(Path(path).glob(filename_meta))
self._available_files_meta = {
count: file
for count, file in enumerate(path_list_meta)
if file.is_file()
}
path_list_exp = []
for path in directory_paths:
path_list_exp.extend(Path(path).glob(filename_exp))
self._available_files_exp = {
count: file
for count, file in enumerate(path_list_exp)
if file.is_file()
}
class GCParser(BaseModel):


def __repr__(self):
return "GC experimental data parser"

def extract_exp_data(self, file_index: int) -> pd.DataFrame:
def extract_experimental_data(self, experimental_data_path: Path) -> pd.DataFrame:
"""Extract only data block as a `pandas.DataFrame`.
Args:
filestem (str): Name of the file (only stem) of which the data is to be extracted.
experimental_data_path (Path): path to the file from which the experimental data are to be extracted.
Returns:
pandas.DataFrame: DataFrame containing only the data from the file.
pandas.DataFrame: DataFrame containing the experimental data from the file read in.
"""
exp_data_df_gc = pd.read_csv(
self._available_files_exp[file_index],
quantity_unit_dict = {
"Peak_number": None,
"Retention_time": 's',
"Signal": None,
"Peak_type": None,
"Peak_area": None,
"Peak_height": None,
"Peak_area_percentage": '%'
}
experimental_data_df = pd.read_csv(
experimental_data_path,
sep=",",
names=[
"Peak_number",
"Retention_time",
"Signal",
"Peak_type",
"Peak_area",
"Peak_height",
"Peak_area_percentage",
],
names=[name for name in quantity_unit_dict.keys],
engine="python",
encoding="utf-16_le",
)

record = exp_data_df_gc.to_dict(orient="list")
mapping = [
{"values": "Peak_number"},
{"values": "Retention_time"},
{"values": "Signal"},
{"values": "Peak_type"},
{"values": "Peak_area"},
{"values": "Peak_height"},
{"values": "Peak_area_percentage"},
]
units_formulae = {
"peak_number": {
"quantity": Quantity.PEAKNUMBER.value,
"unit": None,
},
"retention_time": {
"quantity": Quantity.RETENTIONTIME.value,
"unit": 's',
},
"signal": {
"quantity": Quantity.SIGNAL.value,
"unit": None,
},
"peak_type": {
"quantity": Quantity.PEAKTYPE.value,
"unit": None,
},
"peak_area": {
"quantity": Quantity.PEAKAREA.value,
"unit": None,
},
"peak_height": {
"quantity": Quantity.PEAKHEIGHT.value,
"unit": None,
},
"peak_area_percentage": {
"quantity": Quantity.PEAKAREAPERCENTAGE.value,
"unit": '%',
},
}

gc_exp_data = {}
for i, (param, dict) in enumerate(units_formulae.items()):
gc_exp_data[param] = Data(
**{key: value for key, value in dict.items()},
**{key: record[value] for key, value in mapping[i].items()}
)
return exp_data_df_gc, gc_exp_data
experimental_data_list=[]
for quantity, unit in quantity_unit_dict.items():
experimental_data_list.append(Data(quantity=quantity, values=experimental_data_df[quantity], unit=unit))
return experimental_data_df, experimental_data_list

def extract_metadata(self, file_index: int) -> pd.DataFrame:
"""Extract only data block as a `pandas.DataFrame`.
Expand All @@ -135,31 +60,8 @@ def extract_metadata(self, file_index: int) -> pd.DataFrame:
engine="python",
encoding="utf-16_le",
)
# rename = {
# "Parameter": "parameter",
# "Value": "value",
# "Description": "description",
# }
# rename(rename).
record = gc_metadata_df.to_dict(orient="records")
gc_metadata = {}
for i, entry in enumerate(record):
gc_metadata[i] = Metadata(**entry)
return gc_metadata_df, gc_metadata

@property
def available_meta_files(self) -> list[str]:
return self._available_files_meta
@property
def available_exp_files(self) -> list[str]:
return self._available_files_exp
# def enumerate_available_files(self) -> dict[int, str]:
# """Enumerate the CSV files available in the given directory and
# return a dictionary with their index and name.

# Returns:
# dict[int, str]: Indices and names of available files.
# """
# return {
# count: value for count, value in enumerate(self.available_files)
# }
metadata_list = []
for data in record:
metadata_list.append(Metadata(**data))
return gc_metadata_df, metadata_list
43 changes: 13 additions & 30 deletions datamodel_b07_tc/tools/readers/gstaticparser.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,18 @@
import pandas as pd
import os

from pathlib import Path
from pydantic import BaseModel
from datamodel_b07_tc.core.metadata import Metadata

class GstaticParser(BaseModel):

class GstaticParser:
def __init__(
self,
path_to_directory: str | bytes | os.PathLike | Path,
file_suffix: str,
):
"""Pass the path to a directory containing CSV-type files of the GC to be
read.
Args:
path_to_directory (str | bytes | os.PathLike): Path to a directory containing CSV-type files.
"""
file_search_pattern = "*." + file_suffix
path_list = list(Path(path_to_directory).glob(file_search_pattern))
self._available_files = {
count: file
for count, file in enumerate(path_list)
if file.is_file()
}

def __repr__(self):
return "Gstatic parser"

def extract_metadata(self, file_index: int) -> dict: # filestem: str
def extract_metadata(self, metadata: Path) -> dict:
metadata_df = pd.read_csv(
self._available_files[file_index],
metadata,
sep="\t",
names=[
"Parameter",
"Data_type",
"Type",
"Value",
"Description",
],
Expand All @@ -56,10 +34,15 @@ def extract_metadata(self, file_index: int) -> dict: # filestem: str
)
)
return metadata_df, metadata_list









@property
def available_files(self) -> list[str]:
return self._available_files
# for line in open(self.file, 'r'):
# line = line.strip()
# if '=' in line:
Expand Down
45 changes: 43 additions & 2 deletions datamodel_b07_tc/tools/readers/librarian.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,46 @@
from pathlib import Path
from pydantic import BaseModel, PrivateAttr

class Librarian():


class Librarian(BaseModel):

root_directory: Path

def enumerate_subdirectories(self, subdirectory=None):

directory = self.root_directory
if subdirectory != None:
directory = directory / subdirectory
if not isinstance(directory, Path):
raise ValueError('Not a valid Path object.')
if not directory.exists():
raise ValueError('Directory does not exist.')
dir_dict = {index: dir for index, dir in enumerate([x for x in directory.iterdir() if x.is_dir()])}
print(f'Parent directory: \n {directory} \nAvailable subdirectories:')
for index, dir in dir_dict.items():
print(f'{index}: /{dir.name}')
return dir_dict

def enumerate_files(self, subdirectory=None, filter=None):

directory = self.root_directory
if subdirectory != None:
directory = directory / subdirectory
if not isinstance(directory, Path):
raise ValueError('Not a valid Path object.')
if not directory.exists():
raise ValueError('Directory does not exist.')
suffix = '*'
if filter != None:
suffix = suffix + '.' + filter
file_dict = {index: file for index, file in enumerate(directory.glob(suffix)) if file.is_file()}
print(f'Directory: \n {directory} \nAvailable files:')
for index, file in file_dict.items():
print(f'{index}: {file.name}')
return file_dict

@property
def get_root_directory(self):
return self.root_directory


4 changes: 2 additions & 2 deletions specifications/datamodel_b07_tc.md
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,8 @@ Stoichiometric information about the compound.
- abbreviation
- Type: string
- Description: abbreviation for the parameter.
- data_type
- Type: DataType, string
- type
- Type: DataType
- Description: type of the parameter.
- mode
- Type: string
Expand Down

0 comments on commit ac55b72

Please sign in to comment.