diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..690f93d Binary files /dev/null and b/.DS_Store differ diff --git a/1.1_Main.ipynb b/1.1_Main.ipynb index d4333b5..db204f7 100644 --- a/1.1_Main.ipynb +++ b/1.1_Main.ipynb @@ -24,10 +24,11 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ + "import os\n", "from pathlib import Path\n", "from sdRDM import DataModel" ] @@ -42,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -51,91 +52,28 @@ "# lib = DataModel.from_markdown('specifications/datamodel_b07_tc.md')" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Instantiate ```Dataset``` class of the data model. It is the root class of the data model containing all the other classes. The yet empty dataset is printed to showcase its structure in json format." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**Note that there is a fundamental difference between a dataset and a data model! Whereas the data model holds the hierarchically structured python classes created by sdRDM, the dataset comprises only the python classes that are instantiated yet, where multiple instances of the same class can be allowed.**" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"id\": \"dataset0\",\n", - " \"__source__\": {\n", - " \"root\": \"Dataset\",\n", - " \"repo\": \"https://github.com/FAIRChemistry/datamodel_b07_tc.git\",\n", - " \"commit\": \"8d0ead376455dd3dd946d6ba534438a14ffcb7ae\",\n", - " \"url\": \"https://github.com/FAIRChemistry/datamodel_b07_tc/tree/8d0ead376455dd3dd946d6ba534438a14ffcb7ae\"\n", - " }\n", - "}\n" - ] - } - ], - "source": [ - "dataset = lib.Dataset()\n", - "print(dataset.json())" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Instantiate further classes of the data model and filling them with content, for example the title of the project, the authors and a project description into the data model. A class can either be instantiated ```empty``` , where the content is added ```afterwards```," - ] - }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "gen_inf = lib.GeneralInformation()\n", - "title = 'Electrocatalytic CO2-reduction on carbon'\n", - "gen_inf.title = title\n", - "description = 'The aim of this project is to blablabla'\n", - "gen_inf.description = description" + "dataset.general_information.add_to_authors" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "or ```directly``` when the object is ```created```." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "author_1 = lib.Author(name= 'Richard Schömig', affiliation = 'University of Stuttgart')\n", - "author_2 = lib.Author(name= 'Maximilian Schmidt', affiliation = 'University of Stuttgart')\n", - "gen_inf.authors = [author_1, author_2]" + "Instantiate ```Dataset``` class of the data model. It is the root class of the data model containing all the other classes. The yet empty dataset is printed to showcase its structure in json format." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The ```general_information``` is subsequently added to the dataset." + "**Note that there is a fundamental difference between a dataset and a data model! Whereas the data model holds the hierarchically structured python classes created by sdRDM, the dataset comprises only the python classes that are instantiated yet, where multiple instances of the same class can be allowed.**" ] }, { @@ -144,14 +82,27 @@ "metadata": {}, "outputs": [], "source": [ - "dataset.general_information = gen_inf" + "## Initialize the dataset ##\n", + "\n", + "dataset = lib.Dataset()\n", + "\n", + "# Define name of the project, description as well as authors #\n", + "dataset.general_information.title = 'Electrocatalytic CO2-reduction on carbon'\n", + "dataset.general_information.description = 'The aim of this project is to blablabla'\n", + "\n", + "# List with authors and their affiliation #\n", + "authors = [ 'Richard Schömig', 'Maximilian Schmidt' ] \n", + "affiliations = [ 'University of Stuttgart', 'University of Stuttgart' ]\n", + "\n", + "for aut,aff in zip(authors,affiliations):\n", + " dataset.general_information.add_to_authors( name=aut, affiliation=aff )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The framework of the dataset is with some metadata about the project is now set up and will be stored in a json file in the ```datasets/``` directory." + "General metadata about the project is now added in the dataset and will be stored in a json file in the ```datasets/``` directory." ] }, { @@ -160,12 +111,16 @@ "metadata": {}, "outputs": [], "source": [ - "current_directory = Path.cwd()\n", - "path_to_datasets = current_directory / 'datasets'\n", + "# Define name of dataset #\n", "dataset_name = 'b07.json'\n", + "\n", + "# Define output folder #\n", + "path_to_datasets = Path.cwd() / 'datasets'\n", "path_to_dataset_file = path_to_datasets / dataset_name\n", - "with open(path_to_dataset_file, \"w\") as f:\n", - " f.write(dataset.json())" + "\n", + "# Write file #\n", + "if not os.path.exists(path_to_datasets): os.makedirs(path_to_datasets)\n", + "with open(path_to_dataset_file, \"w\") as f: f.write(dataset.json())" ] } ], @@ -185,7 +140,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.11.5" }, "orig_nbformat": 4 }, diff --git a/2.1_Experimental_parsing.ipynb b/2.1_Experimental_parsing.ipynb index 4d0fe0e..d9b8edf 100644 --- a/2.1_Experimental_parsing.ipynb +++ b/2.1_Experimental_parsing.ipynb @@ -26,255 +26,76 @@ "source": [ "---\n", "## Section 0: Imports, Paths, and Logging\n", - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "---\n", + "\n", "In this section all the necessary python packages are imported, the path to this notebook and the logger for this notebook is set up." ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Activate autoreload." - ] - }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ + "# Activate autoreload to keep on track with changing modules #\n", "%reload_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import standard library python packages necessary to set up the ``logger``." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ + "%autoreload 2\n", + "\n", + "# Import standard libraries #\n", "import os\n", "import json\n", "import logging\n", - "import logging.config\n", - "from pathlib import Path" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Get path to the directory this notebook is located." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "root = Path(os.path.abspath(''))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set path to the directory containing the configuration file for the logger." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "logging_config_path = root / \"datamodel_b07_tc/tools/logging/config_exp_2_1.json\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set up logger by reading the .json-type configuration file." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "with open(logging_config_path) as logging_config_json:\n", - " logging_config = json.load(logging_config_json)\n", - "logging.config.dictConfig(logging_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a child of the root logger and set its name to the name of the current notebook." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "logger = logging.getLogger(__name__)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Set the level of several third-party module loggers to avoid dumping too much information in the log file.\n", - "
Info: Some third party modules use the same logging module and structure as this notebook, which is unproblematic, unless the level of their corresponding logging handlers is too low. In these cases the logging messages of lower levels, such as 'DEBUG' and 'INFO' are propagated to the parent logger of this notebook.
" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "third_party_module_loggers = ['markdown_it', 'h5py', 'numexpr', 'git']\n", - "for logger_ in third_party_module_loggers:\n", - " logging.getLogger(logger_).setLevel('WARNING')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import and instantiate the ``Librarian`` module for efficient and clean file and directory handling." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ + "from pathlib import Path\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "\n", + "# Import librarian module for file directory handling #\n", "from datamodel_b07_tc.tools import Librarian\n", - "librarian = Librarian(root_directory=root)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import modfied sdRDM object." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ + "\n", + "# Import modified sdRDM objects #\n", "from datamodel_b07_tc.modified.experiment import Experiment\n", "from datamodel_b07_tc.modified.measurement import Measurement\n", - "from datamodel_b07_tc.modified.plantsetup import PlantSetup" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
Info: Python objects created by the sdRDM generator can be equipped with additional features, such as functions or classes, e.g. to parse data or perform internal calculations, which allows for a more modular approach of working with them.
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import the data model containing all the objects of sdRDM's python API." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# from sdRDM.generator import generate_python_api\n", - "from sdRDM import DataModel" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "
Info: sdRDM objects already imported are not overriden!
" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Manually generate the sdRDM python objects." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "# generate_python_api('specifications/datamodel_b07_tc.md', '', 'datamodel_b07_tc')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import tools used for parsing and calibration of the raw data." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ + "from datamodel_b07_tc.modified.plantsetup import PlantSetup\n", + "\n", + "# Import datamodel from sdRDM #\n", + "from sdRDM import DataModel\n", + "\n", + "# Import tools for parsing and calibration of the raw data #\n", "from datamodel_b07_tc.tools import Calibrator\n", "from datamodel_b07_tc.tools import gc_parser\n", "from datamodel_b07_tc.tools import gstatic_parser\n", "from datamodel_b07_tc.tools import mfm_parser\n", - "# from datamodel_b07_tc.tools import DEXPI2sdRDM" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import additional standard library python packages." + "# from datamodel_b07_tc.tools import DEXPI2sdRDM\n", + "\n", + "# from sdRDM.generator import generate_python_api\n", + "# generate_python_api('specifications/datamodel_b07_tc.md', '', 'datamodel_b07_tc')" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "import ipywidgets as widgets\n", - "from IPython.display import display" + "#Define paths for loggin output #\n", + "root = Path.cwd()\n", + "logging_config_path = root / \"datamodel_b07_tc/tools/logging/config_exp_2_1.json\"\n", + "\n", + "# Read in logger specs and configurate logger (set name to current notebook) #\n", + "with open(logging_config_path) as logging_config_json: logging.config.dictConfig( json.load( logging_config_json ) )\n", + "logger = logging.getLogger(__name__)\n", + "\n", + "# Set the level of thid-party logger to avoid dumping too much information #\n", + "third_party_module_loggers = ['markdown_it', 'h5py', 'numexpr', 'git']\n", + "for logger_ in third_party_module_loggers: logging.getLogger(logger_).setLevel('WARNING')\n", + "\n", + "# Initialize the librarian with root directory of this notebook #\n", + "librarian = Librarian(root_directory=root)\n", + "\n", + "# Info for loggers #\n", + "# Some third party modules use the same logging module and structure as this notebook, which is unproblematic, \n", + "# unless the level of their corresponding logging handlers is too low. In these cases the logging messages of \n", + "# lower levels, such as 'DEBUG' and 'INFO' are propagated to the parent logger of this notebook." ] }, { @@ -283,26 +104,13 @@ "source": [ "---\n", "## Section 1: Dataset and data model parsing\n", - "---" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ + "---\n", "In this section the data model and the dataset as well as all the output files necessary for the analysis notenook are parsed. " ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print available subdirectories of the 'root' directory." - ] - }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -310,97 +118,41 @@ "output_type": "stream", "text": [ "Parent directory: \n", - " /mnt/c/Users/rscho/Documents/GitHub/datamodel_b07_tc \n", + " /Users/samir/Documents/PhD/SFB1333/datamodel_b07_tc \n", "Available subdirectories:\n", - "0: .../.git\n", - "1: .../.github\n", - "2: .../.vscode\n", - "3: .../data\n", - "4: .../datamodel_b07_tc\n", - "5: .../datasets\n", - "6: .../logging\n", - "7: .../specifications\n" - ] - } - ], - "source": [ - "root_subdirectories = librarian.enumerate_subdirectories(directory=root)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "List all available dataset json files in the 'datasets' directory." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ + "0: .../specifications\n", + "1: .../datasets\n", + "2: .../datamodel_b07_tc\n", + "3: .../.github\n", + "4: .../.git\n", + "5: .../data\n", + "\n", + "\n", "Directory: \n", - " /mnt/c/Users/rscho/Documents/GitHub/datamodel_b07_tc/datasets \n", + " /Users/samir/Documents/PhD/SFB1333/datamodel_b07_tc/datasets \n", "Available files:\n", - "0: b07.json\n" + "0: b07.json\n", + "\n", + "\n" ] } ], "source": [ - "json_dataset_files = librarian.enumerate_files(directory=root_subdirectories[5], filter='json')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Choose dataset to be loaded by its index." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ + "# Check for all available subdirectories #\n", + "root_subdirectories = librarian.enumerate_subdirectories(directory=root)\n", + "print(\"\\n\")\n", + "\n", + "# Search for subdirectory \"datasets\" and in it for all dataset json files #\n", + "idx_dataset = [i for i in range(len(root_subdirectories)) if \"datasets\" in str(root_subdirectories[i])][0]\n", + "json_dataset_files = librarian.enumerate_files(directory=root_subdirectories[idx_dataset], filter='json')\n", + "print(\"\\n\")\n", + "\n", + "# Choose dataset: use the index given. e.g.: 0, 1, .. #\n", "json_dataset = json_dataset_files[0]\n", - "dataset, lib = DataModel.parse(json_dataset)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Visualize the data model." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# lib.Dataset.meta_tree()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print current status of the dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# print(dataset.json())" + "dataset, lib = DataModel.parse(json_dataset)\n", + "\n", + "# If wanted visualize the datamodel as tree (if not then commen this line)#\n", + "#lib.Dataset.meta_tree()" ] }, { @@ -2735,7 +2487,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.11.5" }, "orig_nbformat": 4, "vscode": { diff --git a/datamodel_b07_tc/tools/logging/config_exp_2_1.json b/datamodel_b07_tc/tools/logging/config_exp_2_1.json index b875f3a..b4f50e3 100644 --- a/datamodel_b07_tc/tools/logging/config_exp_2_1.json +++ b/datamodel_b07_tc/tools/logging/config_exp_2_1.json @@ -12,7 +12,7 @@ "class": "logging.FileHandler", "level": "DEBUG", "formatter": "simple", - "filename": "logging/log_exp_2_1.log", + "filename": "datamodel_b07_tc/tools/logging/log_exp_2_1.log", "mode": "a" }, "console": {