From af236396f21c3ee5f407349adc5b0a5ad06c51a3 Mon Sep 17 00:00:00 2001 From: Clark Wang <107419732+OEG-Clark@users.noreply.github.com> Date: Wed, 11 Dec 2024 16:48:17 +0100 Subject: [PATCH 1/4] UPDATE SolarRAG-CLI Update the RAG pipeline, supporting FactRAG and NaiveRAG together --- CLI/CLI_old/README.md | 59 ++++ CLI/CLI_old/Result_Spec.md | 87 ++++++ .../code/__pycache__/process.cpython-310.pyc | Bin 0 -> 3826 bytes CLI/CLI_old/code/cli.py | 278 +++++++++++++++++ CLI/CLI_old/code/data/context.json | 154 ++++++++++ CLI/CLI_old/code/data/paper1_extraction.json | 14 + CLI/CLI_old/code/data/prompts.json | 1 + CLI/CLI_old/code/process.py | 184 ++++++++++++ CLI/CLI_old/code/run_benchmark.py | 247 +++++++++++++++ CLI/CLI_old/code/run_grobid.sh | 16 + CLI/CLI_old/code/setting/config.json | 7 + CLI/README.md | 71 ++--- CLI/Result_Example/SolarFactRAG_Example.json | 1 + CLI/Result_Example/SolarNavieRAG_Example.json | 1 + CLI/Result_Spec.md | 159 +++++----- CLI/code/FactsRAG.py | 280 ++++++++++++++++++ CLI/code/process.py | 92 ++---- CLI/code/prompt.json | 9 + CLI/code/run_factrag.sh | 29 ++ CLI/code/settings/config.json | 7 + 20 files changed, 1499 insertions(+), 197 deletions(-) create mode 100644 CLI/CLI_old/README.md create mode 100644 CLI/CLI_old/Result_Spec.md create mode 100644 CLI/CLI_old/code/__pycache__/process.cpython-310.pyc create mode 100644 CLI/CLI_old/code/cli.py create mode 100644 CLI/CLI_old/code/data/context.json create mode 100644 CLI/CLI_old/code/data/paper1_extraction.json create mode 100644 CLI/CLI_old/code/data/prompts.json create mode 100644 CLI/CLI_old/code/process.py create mode 100644 CLI/CLI_old/code/run_benchmark.py create mode 100644 CLI/CLI_old/code/run_grobid.sh create mode 100644 CLI/CLI_old/code/setting/config.json create mode 100644 CLI/Result_Example/SolarFactRAG_Example.json create mode 100644 CLI/Result_Example/SolarNavieRAG_Example.json create mode 100644 CLI/code/FactsRAG.py create mode 100644 CLI/code/prompt.json create mode 100644 CLI/code/run_factrag.sh create mode 100644 CLI/code/settings/config.json diff --git a/CLI/CLI_old/README.md b/CLI/CLI_old/README.md new file mode 100644 index 0000000..765046f --- /dev/null +++ b/CLI/CLI_old/README.md @@ -0,0 +1,59 @@ +# Solar-QA-CLI +This repository contains the command-line tool for [solar-qa pipepine](https://github.com/oeg-upm/solar-qa-eval) + +## Requirement + +### Requirement for Paper Extraction + +- [Grobid](https://grobid.readthedocs.io/en/latest/) + +### Requirement for Generation + +All required libraries are detailed in requirement.txt. + +```console +pip install -r requirement.txt +``` + + +### Usage + +1. Install [GROBID](https://grobid.readthedocs.io/en/latest/) in your computing device +2. Start running GROBID server in your local device following the instruction from [GROBID WEBSITE](https://grobid.readthedocs.io/en/latest/). +3. With the running GROBID server, upload the configuration of GROBID in sub-folder `.../CLI/setting/config.json`. The file is given below: + +```json +{ + "grobid_server": "http://localhost:8070", + "batch_size": 1000, + "sleep_time": 5, + "timeout": 60, + "coordinates": ["persName", "figure", "ref", "biblStruct", "formula", "s"] +} +``` + + +4. Run the entire command-line tool by running the `cli.py` in the directory `.../CLI/code/cli.py`. The command line to run the `cli.py` is given below: +```json +{ + "--use_platform": the parameter of whether use online platform or local model for the llm(generation model). option = ["True", "False"] + "--user_key": the user key or token for the online platform, type="str" + "--llm_id": the reference id for the llm(generation model), type="str" + "--hf_key": your huggingface token, this is required to use the similarity model, type="str" + "--llm_platform": indication of which llm online platform you wish to use, option=["grob"] + "--sim_model_id": the reference id for the similarity model, type="str" + "--input_file_path": the directory for the pdf fild that you wish to analysis, type="str", file type=.pdf + "--prompt_file_path": the directory for the json file that contains your prompt, file type=.json + "--context_file_path": the directory for where you wish to save the output file, file type=.json +} +``` +*Example not use online platform:* +```console +python cli.py --use_platform False --hf_key YOUR_HF_KEY --llm_id meta-llama/Llama-3.2-3B-Instruct --sim_model_id Salesforce/SFR-Embedding-Mistral --pdf_file_path .../test.pdf --prompt_file .../prompts.json --context_file_path .../context.json +``` +*Example use online platform:* +```console +python cli.py --use_platform True --user_key YOUR_USER_KEY --hf_key YOUR_HF_KEY --llm_id llama-3.1-70b-versatile --llm_platform grob --sim_model_id Salesforce/SFR-Embedding-Mistral --pdf_file_path .../test.pdf --prompt_file .../prompts.json --context_file_path .../context.json +``` + +5. Result format is given at `Result_Spec.md` \ No newline at end of file diff --git a/CLI/CLI_old/Result_Spec.md b/CLI/CLI_old/Result_Spec.md new file mode 100644 index 0000000..808defb --- /dev/null +++ b/CLI/CLI_old/Result_Spec.md @@ -0,0 +1,87 @@ +## CLI Result Specification + +### Generation Model +This item contains the large lanugage model (llm) reference id. + +*Example:* + +```json +"generation_model": "meta-llama/Llama-3.2-3B-Instruct" +``` + + +### Similarity Model +This item contains the similarity model reference id (the model is used in RAG searching stage). + +*Example:* + +```json +"similarity_model": "Salesforce/SFR-Embedding-Mistral" +``` + + +### Similarity Metric +This item contains the calculation metric used in the RAG searching stage + +*Example:* + +```json +"similarity_metric": "Cosine_Similarity" +``` + + +### Result +This item contains all relevant generation results, which includes: + +| Property | Mandatory? | Expected Value | Definition | +| :----- | :---- | :---- | :---- | +| **question_category** | Yes | String | This item refers to what the query is about, five possible choices are given below | +| **query** | Yes | String | This item contains the prompt used for the llm generation | +| **generation** | Yes | Dictionary | This item contains the generation result from the llm model, formatted as question_category: answer in a dictionary, as given in the table below | +| **evidence** | Yes | Dictionary | This item contains the evidence to support the RAG algorithm, which contains **pdf_refercence** and **similiarity_score** | +| **pdf_reference** | Yes | String | This item contains the original text extracted by the similarity model | +| **similiarity_score** | Yes | Float | This item contains the similarity score that are calcuated between quesry embedding and pdf_reference embedding, the calculation metric is the similarity metric. | + +*generation format:* + +Generation contains the result from llm's generation, the result only have one category of the item/items, with the corresponding expected value/values. +The choices for **individual category** and **expected value** are given below. + +```json +"generation":{ + "individual category": "excepted value", + ... +} +``` + +*Choices for question_category and expected value, as the choices for generation:* + +| Category | Expected Value |Definition | +| :----- | :---- | :---- | +| **catalyst/co_catalyst** | The catalyst and co_catalyst used in the experiment |The query is about the catalyst condition | +| **light_source/lamp** | **light_source**: 'UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator'
**lamp**: 'Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator' | The query is about the light usage condition about the experiment | +| **reaction_medium** | 'Liquid', 'Gas' | The query is about the reaction medium used in the experiment | +| **reactor_type** | The query is about the type of the reactor used in the experiment | 'Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed' | +| **operation_mode** | 'Batch', 'Continuous', 'Batch/Continuous' | The query is about how the operation is conducted | + + + +*Example:* +```json +"question_category": "catalyst/co_catalyst", +"query": "\nPlease find the name of the catalyst...", +"generation": { + "catalyst": "TiO2", + "co_catalyst": "Cu" +} +"evidence": [ + { + "pdf_reference": "of TiO 2 photocatalyst.The in situ IR experiments are still in progress to investigate the mechanism aspects of the catalyst.", + "similarity_score": 0.4707722067832947 + }, + { + "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", + "similarity_score": 0.49091827869415283 + }, + ... +] \ No newline at end of file diff --git a/CLI/CLI_old/code/__pycache__/process.cpython-310.pyc b/CLI/CLI_old/code/__pycache__/process.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46f437440408bf91bb39c6003a72a8fe46e94396 GIT binary patch literal 3826 zcmbVPTaz0{74DuHjkJ>X;`OfaC81_hm_#_17APvo#yA-7E*O$cV!IL;ucMls?$wSn zni1Wjy$FdiwXTZ&kUu~OrG=+F^AF&K-!QK{`6u8i0^ey#-d$5XfTrej_vtfzIp;eU zrPI@8hToqq{d4~4s4Z)H+?8<>j7M#$ zbe-)94XnN;oC{18KDE?E z^xX^WV|IsSmMD&9zF=Qkhfw#&Q1~6@u*^PqhqHmrS#JhXrM`VE@k_`I?BCcVIqL24 zJyx6AxVv5}`jwVUqzW>5YgPO6xR*}jNMPLDk%5rPUp&bRqb$fG2>FXq62@I2n=06A z9<$G%WCz>ZQ5=mwn9XF=4AWGJD8cgDKci)f^wiWKGJy)8awz8M--wXzdc`%RQnljs zme%$|*`bX+(N(aA_rdDAH`Y6bD_+gX-O!lZuZ;Q4j@Zr%9f)W7)bj;uj$hPL?M9)@ z9UM>kw$h!P%deQ4#-`7f8aruQHXfwA2SL)flg5E++J;jCB>{qBX9KmOn9$ z8}TPh*aQBU!$8hs7|OVBq07}bg*~+U*3dbE^soTM^Uv64d>aleijPVI8+~pfTWR2k za^Dt}LoTYnx5RYcX)!VLM=oX$EyI~LoH=mLj5+gtN1PG!Eo;JQIfDXtXT`#ZH*ou| zYW535SDfv;w8{}eM!x{lFZNcwH7F;Lo|a(<$AVdvMEDEfiyG#QJ@`G`G7_oxi}&6I zXSXY3LPe^)QFyI;xecf8UE0aA&Pt=Px3^cv_-a^BRjZNc#&_$>3fY78s`W+#Gb>q? z#d716b_wh*UXs%vfwZ6{!5<@!Twq^7vwhGnoq+}YvOaFG%I+@=kRxCNduWkn2TtEn zm-~EZXXufh4>{fRS7$^4Qt0Ex?nckSDSoY9#vVBzn1iW#3~Fv(+>R0v#BuICNTVb# zMZh^BBzJ>OM<&96LR}=`LSD*(R+G|4UL2_?%Na*!oW^uj6?%EhqkFt?JiRd&XP-gTiqJ3=3=#5qBwLPvtiMAK?zBidc|5rge5 zN_REBVX}I4sM_pg1ZD>F1p~re3PSpIr zSz^48J~r3$_3!4qa{`2w+#%G#qk>jm0_OBylkIx^WBd#M z)pS=NqrV4Iwo2Twt0-*d`HWpHSI?k`;qJd2@SoFXRs~Yv&2h(4KgMULe3<-+{#lsv zCPt^Xkg*Ke^w1(dRYF0>2je1l(G)-SsLZ8UFL>tm?L$72 zb{UFZhl*aTVOmr27P=o|OYZ|yuLoW?@d6d@M7yaeClAIsZdYO^@Zty#MM=<0w;sqW z)!xtUyuV@6Ul@pBlsEtS=LA`>fasE{-UU;xo+wMbY)8`4_1^janCb)o@`|@nE1DuO zua1jSlUa=76e%h%#%ZesR47sz3Y<)#)SX6Y+oa-k^be#;pg>{PL18Wx`KqF_=Ec%y zdQ!HUy+cUob0C&gwqQfc2Y773Qmia5y92M8~QzJzoIbWo7Br_#t zeQgB7yXZqH#L6otc%-M;<) zZB?S#RU%X}kMc30OVKe;T-3nYAi8v|or-QOuX9M4*i~io0PN(Mv&%LBY?bCqQ`I;B E4Ymv$aR2}S literal 0 HcmV?d00001 diff --git a/CLI/CLI_old/code/cli.py b/CLI/CLI_old/code/cli.py new file mode 100644 index 0000000..fe1cf8c --- /dev/null +++ b/CLI/CLI_old/code/cli.py @@ -0,0 +1,278 @@ +# File : pipeline.py +# Time :2024/7/14 14:11 +# Author :ClarkWang +# Contact :wzyyyyyy0519@gmail +# Description: + +# !pip install --upgrade langchain langchain_groq +# !pip install --upgrade langchain-community +# !pip install --upgrade langchain-core +# !pip install --upgrade langsmith +# !pip install ctransformers[cuda] +# !pip install huggingface-hub +# !pip install --upgrade sqlalchemy +# !pip install rdflib +# !pip install llama-cpp-python +# !pip install typing-extensions==4.7.1 --upgrade +# !pip install pypdf2 +# !pip install sentence-transformers==2.7.0 +# !pip install --upgrade transformers +# !pip install faiss-cpu +# !pip install bitsandbytes accelerate + +import subprocess +from multiprocessing import Pool, Process +import time + + +background_process = Process(target=subprocess.Popen, args=(["bash", "run_grobid.sh"],)) +background_process.start() +print(background_process.is_alive()) +time.sleep(15) + +# client = GrobidClient(config_path="./setting/config.json") + + + +from grobid_client.grobid_client import GrobidClient +import os +import json +import argparse + +import torch +import transformers +from huggingface_hub import login +from langchain_groq import ChatGroq +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.vectorstores import FAISS +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain.prompts import PromptTemplate +from langchain.chains import RetrievalQA +from process import * + + + +# client = GrobidClient(config_path="./setting/config.json") + +def get_context(context): + res = [] + for item in context: + res.append(item.page_content) + return res + +def clean_gen(gen): + res = {} + for line in gen.split("\n"): + if ":" in line: + try: + s, e = line.split(":") + res[s.strip()] = e + except: + pass + return res + +def str2bool(v): + return v.lower() in ("yes", "true", "t", "1") + +class SolarQA: + def __init__(self, use_platform, user_key, llm_id, hf_key, llm_platform="LOCAL", temperature=0.1, sim_model_id="Salesforce/SFR-Embedding-Mistral", input_file_path=str(), context_file_path=""): + # print(input_file_path) + self.use_platform = use_platform + self.llm_id = llm_id + self.user_key = user_key + self.hf_key = hf_key + self.llm_platform = llm_platform.lower() + self.temperature = temperature + self.sim_model_id = sim_model_id + self.sys_prompt = """ + You are an assistant for extract information from context and selection the possible answer from the selection provided. + You are given the extracted parts of a paper about solar chemistry and a question. Provide the extracted information and nothing else. + """ + self.context_file_path = context_file_path + # self.data = data + self.input_file_path = input_file_path + + self.context_result = { + "generation_model": self.llm_id, + "similarity_model": self.sim_model_id, + "similarity_metric": "Cosine_Similarity", + "result": [] + } + login(self.hf_key) + self.get_text() + self.get_vector() + print("¡¡¡Vector Store Database is prepared!!!") + self.get_llm() + + def get_text(self): + title_list = ["Abstract", "Experimental", "Results and discussion"] + # with open(self.json_path, "rb") as f: + # data = json.load(f) + # print(self.input_file_path) + if self.input_file_path[-3:] == "pdf": + data = process_paper(self.input_file_path) + else: + with open(self.input_file_path, "rb") as f: + data = json.load(f) + # print("¡¡¡PDF file has been extracted!!!") + self.context = "" + for section in data: + if section["title"] in title_list: + self.context += section["title"] + self.context += "\n" + self.context += section["content"] + self.context += "\n" + + def get_llm(self): + if self.use_platform: + if self.llm_platform == "groq": + os.environ["GROQ_API_KEY"] = self.user_key + self.llm = ChatGroq(temperature=self.temperature, model_name=self.llm_id) + else: + raise ValueError('Unsupportted Platform') + else: + try: + bnb_config = transformers.BitsAndBytesConfig( + load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16 + ) + self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.llm_id) + self.llm = transformers.AutoModelForCausalLM.from_pretrained( + self.llm_id, + torch_dtype=torch.bfloat16, + device_map="auto", + quantization_config=bnb_config + ) + self.terminators = [ + self.tokenizer.eos_token_id, + self.tokenizer.convert_tokens_to_ids("<|eot_id|>") + ] + except: + raise ValueError('Unsupportted Platform') + + def get_vector(self): + model_kwargs = {"device": "cpu"} + self.sim_model = HuggingFaceEmbeddings(model_name=self.sim_model_id, model_kwargs=model_kwargs) + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=512, + chunk_overlap=32, + length_function=len + ) + chunks = text_splitter.split_text(text=self.context) + self.vector_store = FAISS.from_texts(chunks, embedding=self.sim_model, normalize_L2=True, distance_strategy="COSINE") + + def search(self, query, k): + embed_q = self.sim_model.embed_query(query) + self.context = self.vector_store.similarity_search_with_score_by_vector(embed_q, k) + + def format_prompt(self, query, k): + self.search(query, k) + prompt = self.sys_prompt + "\n" + "Question:" + prompt += query + prompt += "\n" + prompt += "Context:" + for i in range(k): + prompt += f"{self.context[i]}\n" + return prompt + + def generation(self, query_data): + res = "" + if self.use_platform: + for key, query in query_data.items(): + new_prompt = self.format_prompt(query, 5) + messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] + outputs = self.llm.invoke(messages) + response = outputs.content + temp_res = { + "question_category": key, + "query": query, + "generation": clean_gen(response), + "evidence": [] + } + for i in range(len(self.context)): + context = self.context[i][0].page_content + sim_score = float(self.context[i][1]) + temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) + self.context_result["result"].append(temp_res) + res += response + res += "\n" + self.result = clean_gen(res) + else: + for key, query in query_data.items(): + new_prompt = self.format_prompt(query, 5) + messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] + input_ids = self.tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + return_tensors="pt" + ) + outputs = self.llm.generate( + input_ids, + max_new_tokens=1024, + eos_token_id=self.terminators, + do_sample=True, + temperature=self.temperature, + top_p=0.9, + ) + response = self.tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True) + temp_res = { + "question_category": key, + "query": query, + "generation": clean_gen(response), + "evidence": [] + } + for i in range(len(self.context)): + context = self.context[i][0].page_content + sim_score = float(self.context[i][1]) + temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) + self.context_result["result"].append(temp_res) + res += response + res += "\n" + self.result = clean_gen(res) + + def save_context(self): + # print(self.context_result) + with open(self.context_file_path, "w") as f: + json.dump(self.context_result, f) + print(f"RAG context is saved at: {self.context_file_path}") + + + +def get_parser(): + parser = argparse.ArgumentParser(description="Demo of LLM Pipeline") + parser.add_argument('--use_platform', type=lambda x:str2bool(x), default=True, help="the parameter of whether use online llm platform or use local model") + parser.add_argument('--user_key', default="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", help="if use platform, enter your key for platform", type=str) + parser.add_argument('--llm_id', default="llama-3.1-70b-versatile", help="the reference for the selected model, support grog model, huggingface llm or local model path ", type=str) + parser.add_argument('--hf_key', default="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", help="your huggingface token", type=str) + parser.add_argument('--llm_platform', default="groq", help='your platform choice', choices=["groq"], type=str) + parser.add_argument('--sim_model_id', default='Salesforce/SFR-Embedding-Mistral', help="encoder model for RAG", type=str) + parser.add_argument('--input_file_path', help='input data, extracted context from pdf', type=str) + parser.add_argument('--prompt_file_pdf', help='queries', type=str) + parser.add_argument('--context_file_path', help='save context file', type=str) + return parser + +def main(): + parser = get_parser() + args = parser.parse_args() + args_dict = vars(args) + prompt_file_pdf = args_dict["prompt_file_pdf"] + del args_dict["prompt_file_pdf"] + start_time = time.time() + solar = SolarQA(**args_dict) + print("--- %s Data Preparation and Model Loading time consumption: seconds ---" % (time.time() - start_time)) + temp_time = time.time() + with open(prompt_file_pdf, "rb") as f: + query_data = json.load(f) + solar.generation(query_data=query_data) + print(solar.result) + print("--- %s Model generation time consumption: seconds ---" % (time.time() - temp_time)) + solar.save_context() + + +print("start") +main_process = Process(target=main) +main_process.start() +if main_process.is_alive(): + print("main process is finished") +else: + background_process.kill() diff --git a/CLI/CLI_old/code/data/context.json b/CLI/CLI_old/code/data/context.json new file mode 100644 index 0000000..8ed839d --- /dev/null +++ b/CLI/CLI_old/code/data/context.json @@ -0,0 +1,154 @@ +{ + "generation_model": "meta-llama/Llama-3.2-3B-Instruct", + "similarity_model": "Salesforce/SFR-Embedding-Mistral", + "similarity_metric": "Cosine_Similarity", + "result": [ + { + "question_category": "catalyst/co_catalyst", + "query": "\nPlease find the name of the catalyst and co-catalyst name from the provided context which describe an solar chemical experiment.\nPlease only generate the name of the catalyst and nothing else. Such as TiO2. ZrO2 or Ag, etc.\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nIf there are multiple results, please indicate them as XXX-XXX.\ncatalyst: XXX\nco_catalyst: XXX\n", + "generation": { + "catalyst": " TiO2", + "co_catalyst": " Cu" + }, + "evidence": [ + { + "pdf_reference": "of TiO 2 photocatalyst.The in situ IR experiments are still in progress to investigate the mechanism aspects of the catalyst.", + "similarity_score": 0.4707722067832947 + }, + { + "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", + "similarity_score": 0.49091827869415283 + }, + { + "pdf_reference": "2 catalyst.The highest methanol yield as well as quantum efficiency was achieved by 3CuO/TiO 2 indicating that this catalyst had the highest reactivity among all the prepared catalysts.Besides describing the catalyst reactivity, quantum efficiency value also figured the effectiveness of the reaction system.The effect of copper loading on methanol yields is shown in Fig. 4. The methanol yields increased with Cu loading, but then decreased when the Cu loading exceeded 3 wt.Evidently, more Cu loading can", + "similarity_score": 0.5129979848861694 + }, + { + "pdf_reference": "for Degussa P25 and 3CuO/TiO 2 catalysts.A mean value for E a of ca.26 and 12 kJ/mol was calculated for Degussa P25 and 3CuO/TiO 2 , respectively.These positive values of E a imply that the desorption of products is the rate limiting step in the photosynthetic formation of methanol.In addition, the apparent lower activation energy of 3CuO/TiO 2 catalyst may indicate a catalytic role of copper species as an active site to provide methanol, and hence improving the photoefficiency of TiO 2 photocatalyst.The", + "similarity_score": 0.5173196792602539 + }, + { + "pdf_reference": "Photocatalytic reduction of CO 2 by copper-doped titania catalysts has been investigated.The photocatalysts with various copper species (Cu 0 , Cu I , Cu II ) were prepared by an improved-impregnation method, where copper nitrate is doped into TiO 2 Degussa-P25.It is likely that copper present on the catalyst surface and the grain size of copper-titania catalysts is uniform, with crystallite size approximately 23 nm.The dispersion capacity of CuO in the vacant sites of TiO 2 is about 4.16 Cu 2 nm A2 (2.2", + "similarity_score": 0.5403363704681396 + } + ] + }, + { + "question_category": "light_source/lamp", + "query": "\nPlease find the category of Light_source and Lamp from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Light_source is Solar or Solar Simulator, Lamp is always Solar Simulator.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nLight_source: ['UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator']\nLamp: ['Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nLight_source: XXX\nLamp: XXX\n", + "generation": { + "Light_source": " Solar Simulator", + "Lamp": " Solar Simulator" + }, + "evidence": [ + { + "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", + "similarity_score": 0.47584426403045654 + }, + { + "pdf_reference": "ranging from 0.4 to 1.0 lm.The EDX/Mapping analysis of Cu on the catalysts demonstrated that Cu uniformly dispersed on TiO 2 surface, and further increase of copper leads to the increase of the dot intensities of Cu.In addition, Table 1 lists the elemental wt of Cu estimated from EDX and AAS for comparison.The difference between wt of Cu obtained by EDX on different voltage (15 and 22 kV) and AAS indicated that most Cu was on the surface of the TiO 2 . The DRS patterns of TiO 2 P25 and all CuO-loaded", + "similarity_score": 0.6357707977294922 + }, + { + "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", + "similarity_score": 0.63763827085495 + }, + { + "pdf_reference": "was used to record the diffuse reflectance spectra (DRS) of the samples.The reflectance spectra of the samples were analyzed under ambient condition in the wavelength range of 340-700 nm.Copper concentration in the catalysts was measured by atomic absorption spectroscopy (AAS) using Chem.Tech Analytical 2000 spectrophotometer.The specific surface area of catalysts was measured by multipoint BET of N 2 adsorption in a Quantachrome Autosorb-6.A SEM 515 PHILIPS scanning electron microscope (SEM) equipped with", + "similarity_score": 0.6471814513206482 + }, + { + "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", + "similarity_score": 0.6493161916732788 + } + ] + }, + { + "question_category": "reaction_medium", + "query": "\nPlease find the category of Reaction_medium from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Ph Value is mentioned in the experiment, Reaction_medium is always Liquid.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReaction_medium: ['Liquid', 'Gas']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReaction_medium: XXX\n", + "generation": { + "Reaction_medium": " Liquid" + }, + "evidence": [ + { + "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", + "similarity_score": 0.6080079674720764 + }, + { + "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", + "similarity_score": 0.6227010488510132 + }, + { + "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", + "similarity_score": 0.6527293920516968 + }, + { + "pdf_reference": "agitated the catalyst-suspended solution at the bottom to prevent sedimentation of the catalyst.The liquid sample was withdrawn every hour from a reactor column.The catalyst-suspended samples were centrifuged to separate catalyst particles from the liquid samples and then analyzed by a GC-FID of Shimadzu 8-ATC equipped with Porapak-Q column.", + "similarity_score": 0.6608579754829407 + }, + { + "pdf_reference": "the system.As a result of these sequential reactions (Eq.( 5)), the electron-hole recombination rate could be effectively reduced. To study the temperature dependence of the CO 2 photoreduction, the experiment was repeated at different temperatures in the range of 43-100 C.As can be seen from Fig. 5 the methanol yield increased for higher reaction temperatures.Saladin et al. 10 reported that the kinetic of CO 2 photoreduction can be modeled with the adsorption/desorption equilibrium of the various species", + "similarity_score": 0.6727200150489807 + } + ] + }, + { + "question_category": "reactor_type", + "query": "\nPlease find the category of Reactor_type from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReactor_type: ['Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReactor_type: XXX\n", + "generation": { + "Reactor_type": " Fixed-bed" + }, + "evidence": [ + { + "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", + "similarity_score": 0.5136107802391052 + }, + { + "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", + "similarity_score": 0.6327827572822571 + }, + { + "pdf_reference": "microscope (SEM) equipped with EDAX PV9900 PHILIPS energy dispersive X-ray (EDX) and Mapping was conducted at 15 and 22 kV to observe the morphology of catalysts, to perform semi-quantitative analysis, and to analyze distribution of Cu and Ti, respectively. The photocatalytic reactor configuration consists of a horizontal stainless steel vessel with an inner diameter and height of the vessel 140 and 50 mm, respectively.A top plate cover of the vessel was made of Pyrex and equipped with a gas bubbler and", + "similarity_score": 0.6621394157409668 + }, + { + "pdf_reference": "the system.As a result of these sequential reactions (Eq.( 5)), the electron-hole recombination rate could be effectively reduced. To study the temperature dependence of the CO 2 photoreduction, the experiment was repeated at different temperatures in the range of 43-100 C.As can be seen from Fig. 5 the methanol yield increased for higher reaction temperatures.Saladin et al. 10 reported that the kinetic of CO 2 photoreduction can be modeled with the adsorption/desorption equilibrium of the various species", + "similarity_score": 0.6621772050857544 + }, + { + "pdf_reference": "agitated the catalyst-suspended solution at the bottom to prevent sedimentation of the catalyst.The liquid sample was withdrawn every hour from a reactor column.The catalyst-suspended samples were centrifuged to separate catalyst particles from the liquid samples and then analyzed by a GC-FID of Shimadzu 8-ATC equipped with Porapak-Q column.", + "similarity_score": 0.667502760887146 + } + ] + }, + { + "question_category": "operation_mode", + "query": "\nPlease find the category of Operation_mode from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nOperation_mode: ['Batch', 'Continuous', 'Batch/Continuous']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nOperation_mode: XXX\n", + "generation": { + "Operation_mode": " Batch/Continuous" + }, + "evidence": [ + { + "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", + "similarity_score": 0.6429235935211182 + }, + { + "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", + "similarity_score": 0.6567009687423706 + }, + { + "pdf_reference": "of TiO 2 photocatalyst.The in situ IR experiments are still in progress to investigate the mechanism aspects of the catalyst.", + "similarity_score": 0.6839278340339661 + }, + { + "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", + "similarity_score": 0.6891406774520874 + }, + { + "pdf_reference": "was used to record the diffuse reflectance spectra (DRS) of the samples.The reflectance spectra of the samples were analyzed under ambient condition in the wavelength range of 340-700 nm.Copper concentration in the catalysts was measured by atomic absorption spectroscopy (AAS) using Chem.Tech Analytical 2000 spectrophotometer.The specific surface area of catalysts was measured by multipoint BET of N 2 adsorption in a Quantachrome Autosorb-6.A SEM 515 PHILIPS scanning electron microscope (SEM) equipped with", + "similarity_score": 0.6904171109199524 + } + ] + } + ] +} \ No newline at end of file diff --git a/CLI/CLI_old/code/data/paper1_extraction.json b/CLI/CLI_old/code/data/paper1_extraction.json new file mode 100644 index 0000000..ccaf761 --- /dev/null +++ b/CLI/CLI_old/code/data/paper1_extraction.json @@ -0,0 +1,14 @@ +[ + { + "title": "Abstract", + "content": "Pure TiO 2 and various silver-enriched TiO 2 powders were prepared by the sol-gel process controlled in the reverse micellar environment.The catalysts were tested in CO 2 photocatalytic reduction and characterized by X-ray diffraction (XRD), nitrogen adsorption measurement and UV-vis.Methane and methanol were the main reduction products.The yield of methane and methanol increases when modifying the TiO 2 by silver incorporation is caused by two mechanisms: up to 5% of Ag in TiO 2 the Ag impurity band inside the TiO 2 bandgap decreases the absorption edge and increases so the electron-hole pair generation, above 5% of Ag in TiO 2 Ag metallic clusters are formed in TiO 2 crystals with Shottky barrier at the metal-semiconductor interface, which spatially separates electron and holes and increases their lifetime (decreases probability of their recombination)." + }, + { + "title": "Experimental", + "content": "Pure TiO 2 and various silver-enriched TiO 2 powders were prepared by the sol-gel process controlled in the reverse micellar environment.Pure TiO 2 was synthesized by the addition of Tita- The surface area and porous structure of prepared catalysts were evaluated from nitrogen adsorption isotherms by the BET and BJH methods, respectively.Nitrogen physical adsorption was performed on a volumetric apparatus ASAP2020 Micromeritics (USA).Before analyses the samples were dried at 105 \u2022 C for 24 h in vacuum (0.1 Pa). UV-vis diffuse reflectance spectra of samples were recorded using GBS CINTRA 303 spectrometer equipped with a diffuse reflectance attachment with a spectralon-coated integrating sphere against spectralon reference.The reflectances were recalculated to the absorption using the Schuster-Kubelka-Munk equation, where R \u221e is the diffuse reflectance from a semi-infinite layer.Absorption edge was estimated by extrapolating the decreasing portion of the spectrum to the abscissa at zero absorption.Ag-to-TiO 2 mass ratio in Ag/TiO 2 powders were determined by X-ray fluorescence (XRF) analysis using an ARL 9400 XP sequential WD-XRF spectrometer with the standard deviation lower than 5%. The purity (carbon content in weight %) of the powders was specified on an Elementar Vario EL III (Elementar).The detection limit of the apparatus was 0.1 wt.%. XRD powder diffraction patterns were obtained with the aid of a Seifert-FMP or a Panalytical-MPD laboratory diffractometer with Cu K\u2423 radiation in the conventional focusing Bragg-Brentano geometry in the diffraction angle range 2 = 10-90 \u2022 . The photocatalytic reduction of carbon dioxide was carried out in a homemade apparatus (Fig. 1). The photocatalytic reduction of carbon dioxide was carried out in a stirred batch annular reactor with a suspended catalyst illuminated by UV 8 W Hg lamp (254 nm).GC/FID/TCD was used for the analysis of gas and liquid reaction products.The details of the photocatalytic CO 2 reduction experiment and analytical methods were described in our previous publication [39].It is important to minimize the influence of transport phenomena during kinetic measurements.The elimination of CO 2 diffusion from the bulk of gas through the gas-liquid interface in a laboratory batch slurry reactor was accomplished by saturating the liquid with pure CO 2 before the reaction had been started [4,11].Catalyst loading of 1 g dm -3 was chosen to avoid concentration gradients in the bulk of stirred liquid with TiO 2 suspension due to the scattering effect of light caused by the high TiO 2 concentration [11,16,40,41].The determination of suitable volume of the liquid phase in our annular photoreactor to fulfil the requirement of perfect mixing has been published recently as 100 ml [42]. The accuracy of measurements was verified by series of repeated measurements.Average values were mentioned in this article.Maximal relative error was 5%." + }, + { + "title": "Results and discussion", + "content": "The basic textural properties of prepared titania and doped titania samples are summarised in Table 1 together with real Ag content determined by XRF.All samples possess a relatively high surface area; moreover, the positive effect of silver, which increases the surface area by about 15%, is clearly noticeable.It is obvious that the surface areas of Ag-doped TiO 2 samples varied only slightly and they are independent on the growth of Ag amount.The pore maxima of all samples are nearly the same without dependence on the Ag presence. The purity of all prepared samples was excellent, because the organic residual content was lower than 0.1 wt.%. XRD analysis confirmed the presence of the pure anatase crystallite structure for all samples.Ag was not detected in XRD pattern, probably Ag is situated in the bulk (inside the TiO 2 crystals) and/or Ag clusters are smaller than 0.3 nm.Fig. 2 shows the UV-vis spectra of Ag-TiO 2 catalysts.It is seen that (i) the maxima of the UV-vis band increased with the increasing Ag loading from 360 to 367 nm and (ii) the maximum absorption wavelength increased with increasing Ag loading, i.e. the absorption was shifted into the visible region for Ag-TiO 2 catalysts and the shift increased with the increasing amount of silver.The absorption edge (Fig. 2) values are given in Table 1. The effect of irradiation time on the formation of CO 2 photocatalytic reduction products was investigated over a period of 0-24 h on TiO 2 catalysts with different amount of Ag.Fig. 3 shows the evolution of all reaction products as the functions of the irradiation for a 7% Ag-TiO 2 catalyst.Two main products were determined: methane in the gas phase and methanol in the liquid phase.Hydrogen and low amounts of carbon monoxide were also detected.Other products such as formic acid, formaldehyde, ethane and ethylene could also be formed [2,16], but they were undetectable.The observed order of yields (mol/g cat ) order was: We assume on the basic of observed products the mechanism where the formation of carbon monoxide from \u2022 CO 2 -radical is the first step.Then both methane and methanol are formed in the parallel reactions.This mechanism was first suggested by Anpo [3]. Several authors [2,5,17] proposed the same mechanism with the exception that they predicted the formation of a carbon radical instead of surficial carbon. Comparisons of CH 4 , CH 3 OH, H 2 and CO yields over TiO 2 with different Ag content are shown in Figs.4-6A and B respectively.The yields of methane were negligible during the first 8 h of irradiation in all cases with the exception of catalyst 1 and 5% Ag-TiO 2 (Fig. 4).A similar trend of yield-time dependency was observed; only the shape for the sample with 7% Ag-TiO 2 was slightly different.A substantial increase of methane yield was observed after 8 h of irradiation.The highest yield of methane was observed for the 7% Ag-TiO 2 catalyst.The yields of methanol were one order of magnitude lower than the yields of methane (Fig. 5).Data at 5 h were measured but were under the limit of determinableness (12 g/l i.e. 0.38 mol/gcatalyst).The steepest curve of methanol yield was measured for the 7% Ag-TiO 2 catalyst. The hydrogen yields were small during the first 13 h of irradiation in all cases (Fig. 6A).After 15 h the hydrogen yields started increasing slowly.The yields of carbon monoxide increased slowly during the reaction but all yields were near the limit of detection (Fig. 6B). The data from the longest reaction time span were chosen for the comparison because the yields of all products were the highest and the most accurate.The effect of silver doping on the yields of the two main products (methane and methanol) related to the weight of the catalyst is depicted in Fig. 7.The photoreactivity of Ag-TiO 2 increases with the increase in the Ag dopant concentration.Both yields are the highest for 7% Ag-TiO 2 .The effect of the Ag dopant on the CO 2 photocatalytic reduction has not been studied yet.However, an optimum amount of Ag corresponding to the highest product yields was found for other photocatalytic reactions [27,28,34,36]. The yields of the two main products (methane and methanol) expressed per 1 m 2 of catalyst surface (Fig. 8) show similar trends with the highest yields obtained for 7% Ag-TiO 2 .This indicates that the specific surface area is not the most decisive parameter in the CO 2 photocatalytic reduction. Doping with metals or metal oxides could cause two effects: (1) Firstly, it creates impurity band inside the TiO 2 bandgap, thus shifting the absorption edge to the visible region [29].The decrease of the absorption edge and the shift of absorption wavelength into the visible region in this paper are clearly visible from Table 1 and Fig. 2, for Ag content up to 5%.The decrease of the absorption edge increases the electron and hole pair photogeneration, which causes yields increase of CO 2 photoreduction.(2) Secondly, for Ag content above 5% silver atoms are not anymore randomly located in TiO 2 crystal, but forms probably metallic clusters inside TiO 2 crystal.These metal clusters could provoke a decrease in the electron-hole recombination rate [43,44].This hypothesis was based on the fact that the Fermi level of TiO 2 is higher than that of silver metals [45].Such phenomenon would be expected to lead to the formation of a Schottky barrier at metal-semiconductor contact region, which causes the charge separation, the decrease of electron-hole recombination rate and consequently increase of the TiO 2 photocatalytic activity.The above mentioned fact could be the reason for observed yields increases with increased Ag content in our experiments with CO 2 photocatalytic reduction and is the main reason of yields increase with 7% Ag-TiO 2 since the absorption edge has same value as 5% Ag-TiO 2 . Our results are in agreement with other works dealing with photocatalytic activity of Ag-doped TiO 2 .For example, the presence of Ag in TiO 2 increased the yield of dye degradation [27,28,31,32], photooxidation of 2-propanol [29], degradation Fig. 8. Dependence of the product yields related to specific surface area (after 24 h of irradiation) on the different catalysts. of 1,4-dichlorobenzene [33] and degradation of oxalic acid [37]." + } +] \ No newline at end of file diff --git a/CLI/CLI_old/code/data/prompts.json b/CLI/CLI_old/code/data/prompts.json new file mode 100644 index 0000000..b6dc5eb --- /dev/null +++ b/CLI/CLI_old/code/data/prompts.json @@ -0,0 +1 @@ +{"catalyst/co_catalyst": "\nPlease find the name of the catalyst and co-catalyst name from the provided context which describe an solar chemical experiment.\nPlease only generate the name of the catalyst and nothing else. Such as TiO2. ZrO2 or Ag, etc.\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nIf there are multiple results, please indicate them as XXX-XXX.\ncatalyst: XXX\nco_catalyst: XXX\n", "light_source/lamp": "\nPlease find the category of Light_source and Lamp from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Light_source is Solar or Solar Simulator, Lamp is always Solar Simulator.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nLight_source: ['UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator']\nLamp: ['Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nLight_source: XXX\nLamp: XXX\n", "reaction_medium": "\nPlease find the category of Reaction_medium from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Ph Value is mentioned in the experiment, Reaction_medium is always Liquid.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReaction_medium: ['Liquid', 'Gas']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReaction_medium: XXX\n", "reactor_type": "\nPlease find the category of Reactor_type from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReactor_type: ['Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReactor_type: XXX\n", "operation_mode": "\nPlease find the category of Operation_mode from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nOperation_mode: ['Batch', 'Continuous', 'Batch/Continuous']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nOperation_mode: XXX\n"} \ No newline at end of file diff --git a/CLI/CLI_old/code/process.py b/CLI/CLI_old/code/process.py new file mode 100644 index 0000000..36e3655 --- /dev/null +++ b/CLI/CLI_old/code/process.py @@ -0,0 +1,184 @@ +from grobid_client.grobid_client import GrobidClient +import subprocess +from multiprocessing import Pool +import xml.etree.ElementTree as ET +import json +import pandas as pd +import re +import unicodedata +import logging +import threading +# from cli import * +# from cli import * + +# Configurar el registro (logging) +logging.basicConfig(level=logging.ERROR, format='%(asctime)s:%(levelname)s:%(message)s') + +# GROBID client configuration +client = GrobidClient(config_path="./setting/config.json") +service_name = "processFulltextDocument" + +def process_fulltext_document(service, file): + try: + rsp = client.process_pdf(service, file, + generateIDs=True, + consolidate_header=True, + consolidate_citations=False, + include_raw_citations=False, + include_raw_affiliations=False, + tei_coordinates=True, + segment_sentences=True) + if rsp[1] == 200: # HTTP status check + return rsp[2] # rsp[2] is the XML string + else: + print(f"Error: {rsp[1]}") + except Exception as e: + print(f"Exception during PDF processing: {e}") + return None + +def clean_text(text): + """ Limpia y normaliza el texto para eliminar caracteres no deseados. """ + text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii') # Normalización Unicode + text = re.sub(r'[^\x00-\x7F]+', ' ', text) # Eliminar todos los caracteres no ASCII + text = re.sub(r'[\u00b0\n\t\r]', ' ', text) # Eliminar caracteres específicos + text = re.sub(r'[^A-Za-z0-9\s,.?!;:()\-\'\"/]', '', text) # Mantener solo caracteres alfanuméricos y puntuación básica + text = re.sub(r'\s+', ' ', text).strip() # Reemplazar múltiples espacios por uno solo + return text + +def extract_sections_from_xml(xml_content): + """ Extrae secciones relevantes del contenido XML devuelto por Grobid. """ + root = ET.fromstring(xml_content) + ns = {'tei': 'http://www.tei-c.org/ns/1.0'} + + # Extraer título + title = '' + title_element = root.find(".//tei:title", ns) + if title_element is not None: + title = ''.join(title_element.itertext()).strip() + + # Función para extraer contenido bajo una etiqueta específica + def extract_content_by_tag(tag_name): + content = [] + path = f".//{{{ns['tei']}}}{tag_name}" + for elem in root.findall(path): + text = ''.join(elem.itertext()) + if text: + content.append(clean_text(text.strip())) + return " ".join(content) + + # Función para extraer contenido según palabras clave + def extract_content_by_keywords(start_keywords, end_section): + content = [] + capture = False + for elem in root.iter(): + if elem.tag.endswith("head"): + if elem.text and any(keyword.lower() in elem.text.lower() for keyword in start_keywords): + capture = True + elif elem.text and end_section.lower() in elem.text.lower(): + break + if capture and elem.tag.endswith("p"): + text = ''.join(elem.itertext()).strip() + if text: + content.append(clean_text(text)) + return " ".join(content) + + # Extracción de secciones + sections = [ + {"title": "Abstract", "content": extract_content_by_tag("abstract")}, + {"title": "Experimental", "content": extract_content_by_keywords( + ["Experimental", "Experimental studies", "Experiments", "Experimental methods", "Methods"], "Results and discussion")}, + {"title": "Results and discussion", "content": extract_content_by_keywords( + ["Results and discussion", "Result and discussion", "Results"], "Conclusion")}, + {"title": "Conclusions", "content": extract_content_by_keywords( + ["Conclusion", "Conclusions"], "Conclusion")}, + ] + + # Sección opcional "Supporting Information" + supporting_content = extract_content_by_keywords( + ["Supporting Information", "Supporting"], "Conclusion") + if supporting_content: + sections.append({"title": "Supporting Information", "content": supporting_content}) + + return sections + +def run_process(cmd): + subprocess.run(command, capture_output=True, shell=True) + +def process_paper(pdf_file_path): + """ Procesa un archivo PDF y devuelve una lista con objetos JSON por cada sección. """ + try: + # Procesar el PDF con Grobid para obtener el XML + xml_response = process_fulltext_document(service_name, pdf_file_path) + + if xml_response: + # Extraer secciones relevantes del XML y devolverlas como lista de objetos + sections = extract_sections_from_xml(xml_response) + return sections # Devolver el resultado como una lista de objetos JSON + else: + return [{"error": "Error processing the PDF."}] + except Exception as e: + logging.error(f"Error processing paper: {e}") + return [{"error": str(e)}] + + +# def get_parser(): +# parser = argparse.ArgumentParser(description="Demo of LLM Pipeline") +# parser.add_argument('--use_platform', type=lambda x:str2bool(x), default=True, help="the parameter of whether use online llm platform or use local model") +# parser.add_argument('--user_key', default="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", help="if use platform, enter your key for platform", type=str) +# parser.add_argument('--llm_id', default="llama-3.1-70b-versatile", help="the reference for the selected model, support grog model, huggingface llm or local model path ", type=str) +# parser.add_argument('--hf_key', default="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", help="your huggingface token", type=str) +# parser.add_argument('--llm_platform', default="groq", help='your platform choice', choices=["groq"], type=str) +# parser.add_argument('--sim_model_id', default='Salesforce/SFR-Embedding-Mistral', help="encoder model for RAG", type=str) +# parser.add_argument('--pdf_file_path', help='input data, extracted context from pdf', type=str) +# parser.add_argument('--prompt_file_pdf', help='queries', type=str) +# parser.add_argument('--context_file_path', help='save context file', type=str) +# parser.add_argument('--grobid_path', help='the directoray of your grobid location', type=str) +# return parser + + +# def main(): +# parser = get_parser() +# args = parser.parse_args() +# args_dict = vars(args) +# print(args_dict) +# prompt_file_pdf = args_dict["prompt_file_pdf"] +# grobid_path = args_dict["grobid_path"] +# global command +# command = f"cd ~; cd {grobid_path}; ./gradlew run" +# # DETACHED_PROCESS = 0x00000008 +# # subprocess.Popen(["bash", "run_grobid.sh"], creationflags=DETACHED_PROCESS) +# # subprocess.Popen(["bash", "run_grobid.sh"], close_fds=True) +# # subprocess.Popen(["bash", "run_grobid.sh"]) +# print("Grobid is running") +# global client +# client = GrobidClient(config_path="./setting/config.json") +# global service_name +# service_name = "processFulltextDocument" +# del args_dict["prompt_file_pdf"] +# del args_dict["grobid_path"] +# start_time = time.time() +# solar = SolarQA(**args_dict) +# print("--- %s Data Preparation and Model Loading time consumption: seconds ---" % (time.time() - start_time)) +# temp_time = time.time() +# with open(prompt_file_pdf, "rb") as f: +# query_data = json.load(f) +# solar.generation(query_data=query_data) +# print(solar.result) +# print("--- %s Model generation time consumption: seconds ---" % (time.time() - temp_time)) +# solar.save_context() + +# pool = Pool(2) +# pool.apply_async(subprocess.Popen(["bash", "run_grobid.sh"]), (0,)) +# pool.apply_async(main(), (0,)) +# # main() + +# # Configurar el registro (logging) +# logging.basicConfig(level=logging.ERROR, format='%(asctime)s:%(levelname)s:%(message)s') + +# # GROBID client configuration +# client = GrobidClient(config_path="./setting/config.json") +# service_name = "processFulltextDocument" + + + +# print(type(process_paper("/home/jovyan/grob/test.pdf"))) \ No newline at end of file diff --git a/CLI/CLI_old/code/run_benchmark.py b/CLI/CLI_old/code/run_benchmark.py new file mode 100644 index 0000000..375a6c8 --- /dev/null +++ b/CLI/CLI_old/code/run_benchmark.py @@ -0,0 +1,247 @@ +import os +import time +from grobid_client.grobid_client import GrobidClient +import json +import argparse + +import torch +import transformers +from huggingface_hub import login +from langchain_groq import ChatGroq +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.vectorstores import FAISS +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain.prompts import PromptTemplate +from langchain.chains import RetrievalQA +from process import * + + +def get_context(context): + res = [] + for item in context: + res.append(item.page_content) + return res + +def clean_gen(gen): + res = {} + for line in gen.split("\n"): + if ":" in line: + try: + s, e = line.split(":") + res[s.strip()] = e + except: + pass + return res + +def str2bool(v): + return v.lower() in ("yes", "true", "t", "1") + +class SolarQA: + def __init__(self, use_platform, user_key, llm_id, hf_key, llm_platform="LOCAL", temperature=0.1, sim_model_id="Salesforce/SFR-Embedding-Mistral", input_file_path=str(), context_file_path=""): + # print(input_file_path) + self.use_platform = use_platform + self.llm_id = llm_id + self.user_key = user_key + self.hf_key = hf_key + self.llm_platform = llm_platform.lower() + self.temperature = temperature + self.sim_model_id = sim_model_id + self.sys_prompt = """ + You are an assistant for extract information from context and selection the possible answer from the selection provided. + You are given the extracted parts of a paper about solar chemistry and a question. Provide the extracted information and nothing else. + """ + self.context_file_path = context_file_path + # self.data = data + self.input_file_path = input_file_path + + self.context_result = { + "generation_model": self.llm_id, + "similarity_model": self.sim_model_id, + "similarity_metric": "Cosine_Similarity", + "result": [] + } + login(self.hf_key) + self.get_text() + self.get_vector() + print("¡¡¡Vector Store Database is prepared!!!") + self.get_llm() + + def get_text(self): + title_list = ["Abstract", "Experimental", "Results and discussion"] + # with open(self.json_path, "rb") as f: + # data = json.load(f) + # print(self.input_file_path) + if self.input_file_path[-3:] == "pdf": + data = process_paper(self.input_file_path) + else: + with open(self.input_file_path, "rb") as f: + data = json.load(f) + # print("¡¡¡PDF file has been extracted!!!") + self.context = "" + for section in data: + if section["title"] in title_list: + self.context += section["title"] + self.context += "\n" + self.context += section["content"] + self.context += "\n" + + def get_llm(self): + if self.use_platform: + if self.llm_platform == "groq": + os.environ["GROQ_API_KEY"] = self.user_key + self.llm = ChatGroq(temperature=self.temperature, model_name=self.llm_id) + else: + raise ValueError('Unsupportted Platform') + else: + try: + bnb_config = transformers.BitsAndBytesConfig( + load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16 + ) + self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.llm_id) + self.llm = transformers.AutoModelForCausalLM.from_pretrained( + self.llm_id, + torch_dtype=torch.bfloat16, + device_map="auto", + quantization_config=bnb_config + ) + self.terminators = [ + self.tokenizer.eos_token_id, + self.tokenizer.convert_tokens_to_ids("<|eot_id|>") + ] + except: + raise ValueError('Unsupportted Platform') + + def get_vector(self): + model_kwargs = {"device": "cpu"} + self.sim_model = HuggingFaceEmbeddings(model_name=self.sim_model_id, model_kwargs=model_kwargs) + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=512, + chunk_overlap=32, + length_function=len + ) + chunks = text_splitter.split_text(text=self.context) + self.vector_store = FAISS.from_texts(chunks, embedding=self.sim_model, normalize_L2=True, distance_strategy="COSINE") + + def search(self, query, k): + embed_q = self.sim_model.embed_query(query) + self.context = self.vector_store.similarity_search_with_score_by_vector(embed_q, k) + + def format_prompt(self, query, k): + self.search(query, k) + prompt = self.sys_prompt + "\n" + "Question:" + prompt += query + prompt += "\n" + prompt += "Context:" + for i in range(k): + prompt += f"{self.context[i]}\n" + return prompt + + def generation(self, query_data): + res = "" + if self.use_platform: + for key, query in query_data.items(): + new_prompt = self.format_prompt(query, 5) + messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] + outputs = self.llm.invoke(messages) + response = outputs.content + temp_res = { + "question_category": key, + "query": query, + "generation": clean_gen(response), + "evidence": [] + } + for i in range(len(self.context)): + context = self.context[i][0].page_content + sim_score = float(self.context[i][1]) + temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) + self.context_result["result"].append(temp_res) + res += response + res += "\n" + self.result = clean_gen(res) + else: + for key, query in query_data.items(): + new_prompt = self.format_prompt(query, 5) + messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] + input_ids = self.tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + return_tensors="pt" + ) + outputs = self.llm.generate( + input_ids, + max_new_tokens=1024, + eos_token_id=self.terminators, + do_sample=True, + temperature=self.temperature, + top_p=0.9, + ) + response = self.tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True) + temp_res = { + "question_category": key, + "query": query, + "generation": clean_gen(response), + "evidence": [] + } + for i in range(len(self.context)): + context = self.context[i][0].page_content + sim_score = float(self.context[i][1]) + temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) + self.context_result["result"].append(temp_res) + res += response + res += "\n" + self.result = clean_gen(res) + + def save_context(self): + # print(self.context_result) + with open(self.context_file_path, "w") as f: + json.dump(self.context_result, f) + print(f"RAG context is saved at: {self.context_file_path}") + + +def run_bench(input_folder, output_folder): + dir_list = os.listdir(input_folder) + for i in range(len(dir_list)): + args_dict = { + "use_platform": str2bool("False"), + "user_key": "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", + "llm_id": "meta-llama/Llama-3.2-3B-Instruct", + "hf_key": "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", + "llm_platform": "groq", + "sim_model_id": "Salesforce/SFR-Embedding-Mistral", + "input_file_path": "", + "prompt_file_pdf": "/home/jovyan/Solar/CLI/data/prompts.json", + "context_file_path": "" + } + input_dir = input_folder + f"paper_{i+1}.json" + output_dir = output_folder + f"result_{i+1}.json" + # result_list = os.listdir(output_folder) + if os.path.exists(input_dir): + if os.path.exists(output_dir): + pass + else: + args_dict["input_file_path"] = input_dir + args_dict["context_file_path"] = output_dir + print(args_dict) + prompt_file_pdf = args_dict["prompt_file_pdf"] + del args_dict["prompt_file_pdf"] + start_time = time.time() + solar = SolarQA(**args_dict) + print("--- %s Data Preparation and Model Loading time consumption: seconds ---" % (time.time() - start_time)) + temp_time = time.time() + with open(prompt_file_pdf, "rb") as f: + query_data = json.load(f) + solar.generation(query_data=query_data) + print(solar.result) + print("--- %s Model generation time consumption: seconds ---" % (time.time() - temp_time)) + solar.save_context() + + + + +input_folder = "/home/jovyan/Solar/data/paper_all/" +output_folder = "/home/jovyan/Solar/CLI/context/" +run_bench(input_folder, output_folder) + + + \ No newline at end of file diff --git a/CLI/CLI_old/code/run_grobid.sh b/CLI/CLI_old/code/run_grobid.sh new file mode 100644 index 0000000..5e329cc --- /dev/null +++ b/CLI/CLI_old/code/run_grobid.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# # download GROBID if directory does not exist +declare -r GROBID_VERSION="0.8.0" # or change to current stable version 0.6.2 + +# if [ ! -d grobid-${GROBID_VERSION} ]; then +# wget https://github.com/kermitt2/grobid/archive/${GROBID_VERSION}.zip +# unzip "${GROBID_VERSION}.zip" +# rm "${GROBID_VERSION}.zip" +# fi + + +# run GROBID +cd ~ +cd /home/jovyan/Solar/CLI/grobid-0.8.0 +./gradlew run \ No newline at end of file diff --git a/CLI/CLI_old/code/setting/config.json b/CLI/CLI_old/code/setting/config.json new file mode 100644 index 0000000..789c3fa --- /dev/null +++ b/CLI/CLI_old/code/setting/config.json @@ -0,0 +1,7 @@ +{ + "grobid_server": "http://localhost:8070", + "batch_size": 1000, + "sleep_time": 5, + "timeout": 60, + "coordinates": ["persName", "figure", "ref", "biblStruct", "formula", "s"] +} diff --git a/CLI/README.md b/CLI/README.md index 5674fe8..1ff6457 100644 --- a/CLI/README.md +++ b/CLI/README.md @@ -1,51 +1,42 @@ -# Solar-QA-CLI -This repository contains the command-line tool for [solar-qa pipepine](https://github.com/oeg-upm/solar-qa-eval) +# SolarRAG CLI -## Requirement +> **_Solar CLI:_** This CLI contains SolarFactRAG and SolarNaiveRAG pipeline for [solar-qa pipeline](https://github.com/oeg-upm/solar-qa-eval) -### Requirement for Paper Extraction +## Install -- gorbid-client-python - -### Requirement for Generation - -- torch -- accelerate -- tokenizers -- bitsandbytes -- huggingface_hub -- sentence-transformer -- faiss-cpu -- langchain -- langchain_groq -- langchain-community +#### 1. install all python packages +```console +pip install ollama networkx leidenalg cdlib python-igraph python-dotenv langchain huggingface_hub langchain-ollama==0.1.0 langchain-community==0.2.19 sentence-transformers==2.7.0 grobid-client-python==0.0.3 +``` +#### 2. install ollama -### Usage +more details about ollama please visit the [offical ollama website](https://ollama.com/) -1. Install [GROBID](https://grobid.readthedocs.io/en/latest/) in your computing device -2. Start running GROBID server in your local device by ```console -./gradlew run +curl -fsSL https://ollama.com/install.sh | sh ``` -3. With the running GROBID server, to run the entire command-line tool by running the `cli.py` in the directory `.../CLI/code/cli.py`. The command line to run the `cli.py` is given below: -```json -"--use_platform": the parameter of whether use online platform or local model for the llm(generation model). option = ["True", "False"] -"--user_key": the user key or token for the online platform, type="str" -"--llm_id": the reference id for the llm(generation model), type="str" -"--hf_key": your huggingface token, this is required to use the similarity model, type="str" -"--llm_platform": indication of which llm online platform you wish to use, option=["grob"] -"--sim_model_id": the reference id for the similarity model, type="str" -"--input_file_path": the directory for the pdf fild that you wish to analysis, type="str", file type=.pdf -"--prompt_file_path": the directory for the json file that contains your prompt, file type=.json -"--context_file_path": the directory for where you wish to save the output file, file type=.json -``` -*Example not use online platform:* + +#### 3. install grobid client + +please reference this part to the [offical grobid github page](https://grobid.readthedocs.io/en/latest/Install-Grobid/) + +## Run Solar CLI + +After installed all prerequisite libraries and software, you can simply run the cli by: + ```console -python cli.py --use_platform False --hf_key YOUR_HF_KEY --llm_id meta-llama/Llama-3.2-3B-Instruct --sim_model_id Salesforce/SFR-Embedding-Mistral --pdf_file_path .../test.pdf --prompt_file .../prompts.json --context_file_path .../context.json +python SolarRAG.py --llm_id llama3.2:3b --embedding_id nomic-embed-text --input_file_path XXX/paper_2.pdf --prompt_file XXX/prompt.json --context_file_path XXX/test.json --rag_type fact ``` -*Example use online platform:* -```console -python cli.py --use_platform True --user_key YOUR_USER_KEY --hf_key YOUR_HF_KEY --llm_id llama-3.1-70b-versatile --llm_platform grob --sim_model_id Salesforce/SFR-Embedding-Mistral --pdf_file_path .../test.pdf --prompt_file .../prompts.json --context_file_path .../context.json -``` \ No newline at end of file + +Here is a table that describe the parameters to run the FactRAG cli + +| Parameter | Definition | DataType | Example | +| -------- | ------- | ------- | ------- | +| llm_id | the parameter of which LLM model from ollama to use | String | [llama3.2:3b](https://ollama.com/library/llama3.2) | +| embedding_id | the parameter of which embedding model from ollama to use | String | [nomic-embed-text](https://ollama.com/library/nomic-embed-text) | +| input_file_path | path for input data, pdf file or extracted json file | String | ../paper_1.pdf | +| prompt_file | path for the prompt json file | String | ../code/prompt.json | +| context_file_path | path for save the output json file | String | ../FactRAG/context.json | +| rag_type | the type of rag pipeline, range=['fact', 'naive'] | String | fact | \ No newline at end of file diff --git a/CLI/Result_Example/SolarFactRAG_Example.json b/CLI/Result_Example/SolarFactRAG_Example.json new file mode 100644 index 0000000..6eac6b7 --- /dev/null +++ b/CLI/Result_Example/SolarFactRAG_Example.json @@ -0,0 +1 @@ +{"paper_title": "Photocatalytic Reduction of Greenhouse Gas CO2 to Fuel", "DOI": "10.1007/s10563-009-9065-9", "generation_model": "llama3.2:3b", "similarity_model": "nomic-embed-text", "similarity_metric": "Cosine_Similarity", "rag_type": "fact", "result": [{"question_category": "catalyst", "query": "What is the chemical name of the catalyst used in the experiment?", "generation": "###catalyst:\nTTIP (Titanium(IV) Isopropoxide).", "RAG_source": "generated_facts", "selected_answer": {"catalyst": "\nTTIP (Titanium(IV) Isopropoxide)."}, "evidences": [{"similairty_score": 0.6745214462280273, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.6460967063903809, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed.", "generated_facts": "Facts:\n1. Hydrocarbons are produced by Methane.\n2. Hydrocarbons are produced by Ethylene.\n3. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Methane.\n4. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Ethylene.\n5. The Dye-sensitized Cu-Fe/P25 photocatalyst harnesses Light energy.\n6. The Dye-sensitized Cu-Fe/P25 photocatalyst is enhanced by Sunlight.\n7. CO2 (carbon dioxide) is photoreduced by In situ IR spectroscopy.\n8. Methane is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst.\n9. Ethylene is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst."}, {"similairty_score": 0.6339464783668518, "pdf_reference": "0 -bipyridyl-4,4 0 -dicarboxylate) 2 -(NCS) 2 (also called N3-dye, Solaronix) was used as a dye sensitizer that was dissolved in ethanol (99.5) to obtain a 3 mM dye solution. P25 slurry with corresponding metal salts was prepared by adding aqueous PEG solution with the metal salts to P25 TiO2 fiber Fig. 2 The schematic of light transmission and spread of TiO 2 coated optical fiber powder in a mortar under vigorous grinding with pestle 36.The prepared uniform lump-free slurry was coated on optical fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding", "generated_facts": "Facts:\n1. -bipyridyl-4,4'-dicarboxylate is used as a chemical compound.\n2. NCS is used as a chemical compound.\n3. P25 TiO2 fiber is coated with P25 TiO2.\n4. Metal salts are added to PEG solution to prepare slurry.\n5. Solaronix (N3-dye) is used as a dye.\n6. Ethanol (99.5%) is used as a solvent for the dye solution.\n7. Optical fibers are coated using dip-coating method.\n8. Grinding is performed using mortar and pestle.\n9. P25 TiO2 fiber is used as a photocatalyst material."}, {"similairty_score": 0.6232354044914246, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced", "generated_facts": "Facts: \n1. The optical-fiber photoreactor is designed to be coated with Ag/TiO\u2082 photocatalyst-coated fibers.\n2. The optical-fiber photoreactor comprises nearly 120 Ag/TiO\u2082 photocatalyst-coated fibers.\n3. The photocatalyst-coated fibers are coated with Anatase phase.\n4. All photocatalysts contain the Anatase phase.\n5. Increasing UV light intensity increases methanol yield.\n6. The UV light intensity is set to 10 W/cm\u00b2.\n7. The Ag/TiO\u2082 photocatalyst is used under a light intensity of 10 W/cm\u00b2.\n8. The TiO\u2082-SiO\u2082 mixed oxide is doped with Cu metal and Fe metal.\n9. Photocatalysts exhibit substantial differences in hydrocarbon production.\n10. Hydrocarbon production is affected by product selectivity.\n11. Product selectivity is influenced by Cu metal and Fe metal.\n12. The TiO\u2082-SiO\u2082 mixed oxide doped with Cu and Fe metals produces methane.\n13. The TiO\u2082-SiO\u2082 mixed oxide doped with Cu and Fe metals also produces ethylene."}, {"similairty_score": 0.6223254799842834, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}]}, {"question_category": "co_catalyst", "query": "What is the chemical name of the co_catalyst used in the experiment?", "generation": "###co_catalyst:\nacac", "RAG_source": "generated_facts", "selected_answer": {"co_catalyst": "\nacac"}, "evidences": [{"similairty_score": 0.65687096118927, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.6408281922340393, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed.", "generated_facts": "Facts:\n1. Hydrocarbons are produced by Methane.\n2. Hydrocarbons are produced by Ethylene.\n3. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Methane.\n4. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Ethylene.\n5. The Dye-sensitized Cu-Fe/P25 photocatalyst harnesses Light energy.\n6. The Dye-sensitized Cu-Fe/P25 photocatalyst is enhanced by Sunlight.\n7. CO2 (carbon dioxide) is photoreduced by In situ IR spectroscopy.\n8. Methane is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst.\n9. Ethylene is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst."}, {"similairty_score": 0.6391236186027527, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was", "generated_facts": "Facts: \n1. Thermocouple is placed at Reactor.\n2. Reactor was purged by CO2 gas.\n3. Reactor was purged by Distilled water.\n4. CO2 gas bubbled through Distilled water.\n5. Space velocity of CO2 gas and H2O vapor.\n6. Space velocity of CO2 gas and H2O vapor was maintained at nearly 0.72 h-1.\n7. Photo reactions were carried out at steady state by Reactor.\n8. Photo reactions lasted for 6-24 h.\n9. Catalysts were repeated two or three times on Reactor.\n10. Catalysts deactivation was found to be negligible.\n11. Outlet gases were analyzed by GC equipment.\n12. GC equipment was equipped with FID and Porapak Q column.\n13. TiO2 underwent thermal-hydrolysis.\n14. CO2 was photoreduced on Thermal-hydrolyzed TiO2."}, {"similairty_score": 0.6241745948791504, "pdf_reference": "0 -bipyridyl-4,4 0 -dicarboxylate) 2 -(NCS) 2 (also called N3-dye, Solaronix) was used as a dye sensitizer that was dissolved in ethanol (99.5) to obtain a 3 mM dye solution. P25 slurry with corresponding metal salts was prepared by adding aqueous PEG solution with the metal salts to P25 TiO2 fiber Fig. 2 The schematic of light transmission and spread of TiO 2 coated optical fiber powder in a mortar under vigorous grinding with pestle 36.The prepared uniform lump-free slurry was coated on optical fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding", "generated_facts": "Facts:\n1. -bipyridyl-4,4'-dicarboxylate is used as a chemical compound.\n2. NCS is used as a chemical compound.\n3. P25 TiO2 fiber is coated with P25 TiO2.\n4. Metal salts are added to PEG solution to prepare slurry.\n5. Solaronix (N3-dye) is used as a dye.\n6. Ethanol (99.5%) is used as a solvent for the dye solution.\n7. Optical fibers are coated using dip-coating method.\n8. Grinding is performed using mortar and pestle.\n9. P25 TiO2 fiber is used as a photocatalyst material."}, {"similairty_score": 0.6187817454338074, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}]}, {"question_category": "Light_source", "query": "From the choices provided, which light source is used in the experiment?", "generation": "###Light_source: 'Solar'", "RAG_source": "generated_facts", "selected_answer": {"Light_source": " 'Solar'"}, "evidences": [{"similairty_score": 0.6323468089103699, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.6295710802078247, "pdf_reference": "transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig. 3.The fibers with length of nearly 11 cm long were inserted into the OFPR.The optical fibers were supported on circular plates with a diameter of 5.0 cm.The OFPR was irradiated by artificial light or concentrated natural sunlight through the quartz window at one side.The artificial light (320-500 nm) was supplied by Exfo Acticure 4,000 with a highest intensity at 365 nm using an appropriate color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar", "generated_facts": "Facts:\n1. Light is transmitted by a fiber-supported TiO2 film.\n2. A fiber-supported TiO2 film is inserted into fibers.\n3. Fibers are supported on circular plates.\n4. Circular plates have a diameter of 5.0 cm.\n5. An OFPR module is irradiated with artificial light or concentrated natural sunlight.\n6. Artificial light is supplied by the Exfo Acticure 4,000.\n7. The Exfo Acticure 4,000 uses an appropriate color filter.\n8. Light intensity can be measured with a Lumen meter (Exfo).\n9. Solar irradiates an OFPR module.\n10. Concentrated natural sunlight enters through a quartz window.\n\nInferred Facts:\n11. TiO2 is used in a fiber-supported TiO2 film.\n12. An OFPR processes and repairs fibers.\n13. A quartz window allows concentrated natural sunlight to enter."}, {"similairty_score": 0.629106342792511, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The", "generated_facts": "Facts:\n1. The Lumen meter measures light intensity.\n2. The solar concentrator tracks the sun's trajectory in Japan.\n3. The solar concentrator collects natural sunlight.\n4. The reflection dish of the solar concentrator also tracks the sun's trajectory.\n5. Optical cables transmit concentrated sunlight.\n6. Windows receive focused sunlight from the optical cables.\n7. Photoreactors contain windows that receive sunlight.\n8. Heating tapes wrap around photoreactors to regulate temperature.\n9. Temperature controllers control the heating tape temperature.\n10. Thermocouples measure reactor temperatures.\n11. Days have maximum sunlight intensity.\n12. The solar concentrator is used to collect natural sunlight in Japan.\n13. Lumen meters can be used to tune and measure light intensity.\n\nNote: I've rephrased some of the relationships to make them more concise and clear, while maintaining their original meaning."}, {"similairty_score": 0.6268292665481567, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}, {"similairty_score": 0.6255613565444946, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption", "generated_facts": "Facts:\n1. The electric heater is connected to a sample cup.\n2. The temperature controller is directly connected to a thermocouple and a heater.\n3. The HVC has a thermocouple and a heater.\n4. The EXFO manufactured the OmniCure 1000.\n5. The OmniCure 1000 is a type of UV light source.\n6. The 100 W Hg lamp is a type of UV light source.\n7. The optical cable is connected to a quartz window.\n8. The quartz window has UV light (light from the UV light source).\n9. The TiO2 film absorbs light with wavelength below 380 nm."}]}, {"question_category": "Lamp", "query": "From the choices provided, which lamp is used in the experiment?", "generation": "###Lamp: 'LED'", "RAG_source": "generated_facts", "selected_answer": {"Lamp": " 'LED'"}, "evidences": [{"similairty_score": 0.6355500221252441, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.6323181986808777, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption", "generated_facts": "Facts:\n1. The electric heater is connected to a sample cup.\n2. The temperature controller is directly connected to a thermocouple and a heater.\n3. The HVC has a thermocouple and a heater.\n4. The EXFO manufactured the OmniCure 1000.\n5. The OmniCure 1000 is a type of UV light source.\n6. The 100 W Hg lamp is a type of UV light source.\n7. The optical cable is connected to a quartz window.\n8. The quartz window has UV light (light from the UV light source).\n9. The TiO2 film absorbs light with wavelength below 380 nm."}, {"similairty_score": 0.6160007119178772, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}, {"similairty_score": 0.6134966611862183, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The", "generated_facts": "Facts:\n1. The Lumen meter measures light intensity.\n2. The solar concentrator tracks the sun's trajectory in Japan.\n3. The solar concentrator collects natural sunlight.\n4. The reflection dish of the solar concentrator also tracks the sun's trajectory.\n5. Optical cables transmit concentrated sunlight.\n6. Windows receive focused sunlight from the optical cables.\n7. Photoreactors contain windows that receive sunlight.\n8. Heating tapes wrap around photoreactors to regulate temperature.\n9. Temperature controllers control the heating tape temperature.\n10. Thermocouples measure reactor temperatures.\n11. Days have maximum sunlight intensity.\n12. The solar concentrator is used to collect natural sunlight in Japan.\n13. Lumen meters can be used to tune and measure light intensity.\n\nNote: I've rephrased some of the relationships to make them more concise and clear, while maintaining their original meaning."}, {"similairty_score": 0.6098795533180237, "pdf_reference": "transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig. 3.The fibers with length of nearly 11 cm long were inserted into the OFPR.The optical fibers were supported on circular plates with a diameter of 5.0 cm.The OFPR was irradiated by artificial light or concentrated natural sunlight through the quartz window at one side.The artificial light (320-500 nm) was supplied by Exfo Acticure 4,000 with a highest intensity at 365 nm using an appropriate color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar", "generated_facts": "Facts:\n1. Light is transmitted by a fiber-supported TiO2 film.\n2. A fiber-supported TiO2 film is inserted into fibers.\n3. Fibers are supported on circular plates.\n4. Circular plates have a diameter of 5.0 cm.\n5. An OFPR module is irradiated with artificial light or concentrated natural sunlight.\n6. Artificial light is supplied by the Exfo Acticure 4,000.\n7. The Exfo Acticure 4,000 uses an appropriate color filter.\n8. Light intensity can be measured with a Lumen meter (Exfo).\n9. Solar irradiates an OFPR module.\n10. Concentrated natural sunlight enters through a quartz window.\n\nInferred Facts:\n11. TiO2 is used in a fiber-supported TiO2 film.\n12. An OFPR processes and repairs fibers.\n13. A quartz window allows concentrated natural sunlight to enter."}]}, {"question_category": "Reaction_medium", "query": "From the choices provided, which reaction medium is used in the experiment?", "generation": "###Reaction_medium: Liquid", "RAG_source": "generated_facts", "selected_answer": {"Reaction_medium": " Liquid"}, "evidences": [{"similairty_score": 0.6295759081840515, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.5933018922805786, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.5931262969970703, "pdf_reference": "0 -bipyridyl-4,4 0 -dicarboxylate) 2 -(NCS) 2 (also called N3-dye, Solaronix) was used as a dye sensitizer that was dissolved in ethanol (99.5) to obtain a 3 mM dye solution. P25 slurry with corresponding metal salts was prepared by adding aqueous PEG solution with the metal salts to P25 TiO2 fiber Fig. 2 The schematic of light transmission and spread of TiO 2 coated optical fiber powder in a mortar under vigorous grinding with pestle 36.The prepared uniform lump-free slurry was coated on optical fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding", "generated_facts": "Facts:\n1. -bipyridyl-4,4'-dicarboxylate is used as a chemical compound.\n2. NCS is used as a chemical compound.\n3. P25 TiO2 fiber is coated with P25 TiO2.\n4. Metal salts are added to PEG solution to prepare slurry.\n5. Solaronix (N3-dye) is used as a dye.\n6. Ethanol (99.5%) is used as a solvent for the dye solution.\n7. Optical fibers are coated using dip-coating method.\n8. Grinding is performed using mortar and pestle.\n9. P25 TiO2 fiber is used as a photocatalyst material."}, {"similairty_score": 0.5924503207206726, "pdf_reference": "The TiO 2 solutions to be on optical fibers were prepared by the thermal hydrolysis method.Titanium (IV) butoxide and polyethylene glycol (PEG, molecular weight of 20,000, Merck, Darmstadt, Germany) were added to a 0.1 M nitric acid solution.The volume ratio of titanium butoxide to HNO 3 was 1:6, and the weight of PEG was 50 of that of TiO 2 .The appearance of pure TiO 2 solution was milky and slightly transparent.PEG was added to prevent cracking during the drying and calcination of the film.Furthermore, it tends to increase the viscosity of the solution as well as the uniformity TiO 2", "generated_facts": "Facts:\n1. Titanium (IV) butoxide has component TiO2.\n2. TiO2 will contain TiO2 solution.\n3. Polyethylene glycol (PEG) has molecular weight 20,000.\n4. Merck is obtained from PEG.\n5. Nitric acid solution is added to TiO2 with a volume ratio of 1:6 to Titanium (IV) butoxide.\n6. TiO2 solution has concentration of nitric acid in M.\n7. Thermal hydrolysis method involves hydrolysis reaction.\n8. TiO2 solutions are prepared by thermal hydrolysis method.\n9. Film formation involves drying and calcination of TiO2 solution.\n10. Drying is part of film formation process for TiO2 solution.\n11. Calcination is part of film formation process for TiO2 solution.\n12. Optical fibers will contain TiO2 solution.\n13. Titanium (IV) butoxide is added to 0.1 M nitric acid solution."}, {"similairty_score": 0.5891863703727722, "pdf_reference": "fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding photocatalyst into the 3 mM dye solution for 24 h.Subsequently, dye-adsorbed photocatalyst was rinsed with ethanol (99.5) to obtain a monolayer of dye on the photocatalyst surface.The resulting dye-adsorbed photocatalyst was finally put in an oven at 80 C for 30 min to remove the residue solvent on the surface of the photocatalyst. An optical-fiber photoreactor (OFPR) was designed and assembled to transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig.", "generated_facts": "Facts:\n1. Fibers are obtained by dip-coating method.\n2. Dye-adsorbed photocatalyst is obtained by dipping into 3 mM dye solution for 24 h.\n3. Dye-adsorbed photocatalist is rinsed with ethanol (99.5%).\n4. Dye-adsorbed photocatalist is put in an oven at 80 C for 30 min to remove residue solvent on the surface of the photocatalyst.\n5. Optical-fiber photoreactor (OFPR) is designed and assembled to transmit light to TiO 2 film.\n6. Photocatalyst is dipped into 3 mM dye solution for 24 h to obtain dye-adsorbed photocatalyst.\n7. Dye-adsorbed photocatalist is put in an oven at 80 C for 30 min to remove residue solvent on the surface of the photocatalyst (same as relationship 4).\n8. TiO 2 film transmits light from one side of the OFPR module to optical-fiber photoreactor (OFPR)."}]}, {"question_category": "Reactor_type", "query": "From the choices provided, which reactor type is used in the experiment?", "generation": "###Reactor_type: Fluidised-bed", "RAG_source": "generated_facts", "selected_answer": {"Reactor_type": " Fluidised-bed"}, "evidences": [{"similairty_score": 0.6250825524330139, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.5985174775123596, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was", "generated_facts": "Facts: \n1. Thermocouple is placed at Reactor.\n2. Reactor was purged by CO2 gas.\n3. Reactor was purged by Distilled water.\n4. CO2 gas bubbled through Distilled water.\n5. Space velocity of CO2 gas and H2O vapor.\n6. Space velocity of CO2 gas and H2O vapor was maintained at nearly 0.72 h-1.\n7. Photo reactions were carried out at steady state by Reactor.\n8. Photo reactions lasted for 6-24 h.\n9. Catalysts were repeated two or three times on Reactor.\n10. Catalysts deactivation was found to be negligible.\n11. Outlet gases were analyzed by GC equipment.\n12. GC equipment was equipped with FID and Porapak Q column.\n13. TiO2 underwent thermal-hydrolysis.\n14. CO2 was photoreduced on Thermal-hydrolyzed TiO2."}, {"similairty_score": 0.5907428860664368, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}, {"similairty_score": 0.5826471447944641, "pdf_reference": "equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was studied using diffusive reflectance infrared Fourier transform (DRIFT) under UV irradiation.Zero-grade air, high-purity He and ultra-purity CO 2 (99.999 v), from Air Products (USA) were used in the IR system.In order to reduce water interference, air and He were passed through a moisture trapper before entering the photoreactor. A high-temperature chamber (HVC, Harrick HVC-DRP-1) made of 316 stainless steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were", "generated_facts": "Here are the simple fact sentences constructed by combining the given relationships:\n\nFacts:\n1. The Fused Inertial Detector is equipped with a Porapak Q column.\n2. Titanium Dioxide was studied on Carbon Dioxide using DRIFT spectroscopy.\n3. Air Products supplied ultra-purity CO2 (99.999 v) for use in the experiment.\n4. Helium passed through a moisture trapper before entering the photoreactor.\n5. Zero-grade air passed through a moisture trapper before entering the photoreactor.\n6. High-purity helium was used in the experiment.\n7. The HVC-DRP-1 is made of 316 stainless steel.\n8. The photoreactor is used as a high-temperature chamber, Harrick model.\n9. The IR system is used in the photoreduction of CO2 on TiO2.\n10. Air was passed through a moisture trapper before entering the photoreactor.\n\nNote that I did not include relationship 13 as it is an implicit repetition of relationship 7 and does not provide any new information."}, {"similairty_score": 0.5698617696762085, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}]}, {"question_category": "Operation_mode", "query": "From the choices provided, which oepration mode is used in the experiment?", "generation": "###Operation_mode: Batch/Continuous", "RAG_source": "generated_facts", "selected_answer": {"Operation_mode": " Batch/Continuous"}, "evidences": [{"similairty_score": 0.5681795477867126, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.5528874397277832, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.5436047315597534, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption", "generated_facts": "Facts:\n1. The electric heater is connected to a sample cup.\n2. The temperature controller is directly connected to a thermocouple and a heater.\n3. The HVC has a thermocouple and a heater.\n4. The EXFO manufactured the OmniCure 1000.\n5. The OmniCure 1000 is a type of UV light source.\n6. The 100 W Hg lamp is a type of UV light source.\n7. The optical cable is connected to a quartz window.\n8. The quartz window has UV light (light from the UV light source).\n9. The TiO2 film absorbs light with wavelength below 380 nm."}, {"similairty_score": 0.5275892615318298, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The", "generated_facts": "Facts:\n1. The Lumen meter measures light intensity.\n2. The solar concentrator tracks the sun's trajectory in Japan.\n3. The solar concentrator collects natural sunlight.\n4. The reflection dish of the solar concentrator also tracks the sun's trajectory.\n5. Optical cables transmit concentrated sunlight.\n6. Windows receive focused sunlight from the optical cables.\n7. Photoreactors contain windows that receive sunlight.\n8. Heating tapes wrap around photoreactors to regulate temperature.\n9. Temperature controllers control the heating tape temperature.\n10. Thermocouples measure reactor temperatures.\n11. Days have maximum sunlight intensity.\n12. The solar concentrator is used to collect natural sunlight in Japan.\n13. Lumen meters can be used to tune and measure light intensity.\n\nNote: I've rephrased some of the relationships to make them more concise and clear, while maintaining their original meaning."}, {"similairty_score": 0.524968683719635, "pdf_reference": "Experimental", "generated_facts": "Let's construct some simple fact sentences using the provided relationships.\n\nSince we're working with a single entity, I'll use the first option: (Experimental, is, ). This implies that Experimental is a thing or concept that exists on its own.\n\nHere are a few examples of fact sentences:\n\n1. The experimental process is ongoing.\n2. Experimental methods have been used in various fields.\n3. The term \"experimental\" refers to a type of scientific inquiry.\n\nNow, let's try the second option: (Experimental, related to, ). This implies that Experimental has some connection or association with something else.\n\nHere are a few examples of fact sentences:\n\n1. Experimental results often require statistical analysis.\n2. The experimental approach is distinct from traditional methods.\n3. Experimental design plays a crucial role in scientific research.\n\nPlease let me know if you'd like to explore more relationships or provide additional context!"}]}]} \ No newline at end of file diff --git a/CLI/Result_Example/SolarNavieRAG_Example.json b/CLI/Result_Example/SolarNavieRAG_Example.json new file mode 100644 index 0000000..7551849 --- /dev/null +++ b/CLI/Result_Example/SolarNavieRAG_Example.json @@ -0,0 +1 @@ +{"paper_title": "Photocatalytic Reduction of Greenhouse Gas CO2 to Fuel", "DOI": "10.1007/s10563-009-9065-9", "generation_model": "llama3.2:3b", "similarity_model": "nomic-embed-text", "similarity_metric": "Cosine_Similarity", "rag_type": "naive", "result": [{"question_category": "catalyst", "query": "What is the chemical name of the catalyst used in the experiment?", "generation": "###catalyst: Titanium Dioxide (TiO2)", "RAG_source": "chunks", "selected_answer": {"catalyst": " Titanium Dioxide (TiO2)"}, "evidences": [{"similairty_score": 0.6278509497642517, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.6265729069709778, "pdf_reference": "Article_Title\nPhotocatalytic Reduction of Greenhouse Gas CO2 to Fuel\nAbstract"}, {"similairty_score": 0.6251746416091919, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel"}, {"similairty_score": 0.6206226944923401, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced"}, {"similairty_score": 0.6131582260131836, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed."}]}, {"question_category": "co_catalyst", "query": "What is the chemical name of the co_catalyst used in the experiment?", "generation": "###co_catalyst: Cu-Fe", "RAG_source": "chunks", "selected_answer": {"co_catalyst": " Cu-Fe"}, "evidences": [{"similairty_score": 0.6458245515823364, "pdf_reference": "Article_Title\nPhotocatalytic Reduction of Greenhouse Gas CO2 to Fuel\nAbstract"}, {"similairty_score": 0.6371890902519226, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.626099705696106, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed."}, {"similairty_score": 0.6174551248550415, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was"}, {"similairty_score": 0.6073813438415527, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced"}]}, {"question_category": "Light_source", "query": "From the choices provided, which light source is used in the experiment?", "generation": "###Light_source: UV", "RAG_source": "chunks", "selected_answer": {"Light_source": " UV"}, "evidences": [{"similairty_score": 0.6839399337768555, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.6214293837547302, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.6177785396575928, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption"}, {"similairty_score": 0.6095446944236755, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.6055726408958435, "pdf_reference": "transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig. 3.The fibers with length of nearly 11 cm long were inserted into the OFPR.The optical fibers were supported on circular plates with a diameter of 5.0 cm.The OFPR was irradiated by artificial light or concentrated natural sunlight through the quartz window at one side.The artificial light (320-500 nm) was supplied by Exfo Acticure 4,000 with a highest intensity at 365 nm using an appropriate color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar"}]}, {"question_category": "Lamp", "query": "From the choices provided, which lamp is used in the experiment?", "generation": "###Lamp: 'Mercury-Xenon'", "RAG_source": "chunks", "selected_answer": {"Lamp": " 'Mercury-Xenon'"}, "evidences": [{"similairty_score": 0.6403747797012329, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.6216245889663696, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.601848304271698, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption"}, {"similairty_score": 0.59084552526474, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.5811028480529785, "pdf_reference": "Experimental"}]}, {"question_category": "Reaction_medium", "query": "From the choices provided, which reaction medium is used in the experiment?", "generation": "###Reaction_medium: Liquid", "RAG_source": "chunks", "selected_answer": {"Reaction_medium": " Liquid"}, "evidences": [{"similairty_score": 0.6020915508270264, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.5890271067619324, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.5843930244445801, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.5805729031562805, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel"}, {"similairty_score": 0.5772931575775146, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced"}]}, {"question_category": "Reactor_type", "query": "From the choices provided, which reactor type is used in the experiment?", "generation": "###Reactor_type: Optical Fiber", "RAG_source": "chunks", "selected_answer": {"Reactor_type": " Optical Fiber"}, "evidences": [{"similairty_score": 0.6050081253051758, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.5929175615310669, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.5833992958068848, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.5747902989387512, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was"}, {"similairty_score": 0.5674387216567993, "pdf_reference": "Experimental"}]}, {"question_category": "Operation_mode", "query": "From the choices provided, which oepration mode is used in the experiment?", "generation": "###Operation_mode: 'Continuous'", "RAG_source": "chunks", "selected_answer": {"Operation_mode": " 'Continuous'"}, "evidences": [{"similairty_score": 0.6241655349731445, "pdf_reference": "Experimental"}, {"similairty_score": 0.5862252116203308, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.5714736580848694, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.5229113101959229, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption"}, {"similairty_score": 0.49779558181762695, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel"}]}]} \ No newline at end of file diff --git a/CLI/Result_Spec.md b/CLI/Result_Spec.md index 808defb..0b0b001 100644 --- a/CLI/Result_Spec.md +++ b/CLI/Result_Spec.md @@ -1,87 +1,78 @@ -## CLI Result Specification - -### Generation Model -This item contains the large lanugage model (llm) reference id. - -*Example:* - -```json -"generation_model": "meta-llama/Llama-3.2-3B-Instruct" -``` - - -### Similarity Model -This item contains the similarity model reference id (the model is used in RAG searching stage). - -*Example:* - -```json -"similarity_model": "Salesforce/SFR-Embedding-Mistral" -``` - - -### Similarity Metric -This item contains the calculation metric used in the RAG searching stage - -*Example:* - -```json -"similarity_metric": "Cosine_Similarity" -``` - - -### Result -This item contains all relevant generation results, which includes: - -| Property | Mandatory? | Expected Value | Definition | -| :----- | :---- | :---- | :---- | -| **question_category** | Yes | String | This item refers to what the query is about, five possible choices are given below | -| **query** | Yes | String | This item contains the prompt used for the llm generation | -| **generation** | Yes | Dictionary | This item contains the generation result from the llm model, formatted as question_category: answer in a dictionary, as given in the table below | -| **evidence** | Yes | Dictionary | This item contains the evidence to support the RAG algorithm, which contains **pdf_refercence** and **similiarity_score** | -| **pdf_reference** | Yes | String | This item contains the original text extracted by the similarity model | -| **similiarity_score** | Yes | Float | This item contains the similarity score that are calcuated between quesry embedding and pdf_reference embedding, the calculation metric is the similarity metric. | - -*generation format:* - -Generation contains the result from llm's generation, the result only have one category of the item/items, with the corresponding expected value/values. -The choices for **individual category** and **expected value** are given below. - -```json -"generation":{ - "individual category": "excepted value", - ... -} -``` - -*Choices for question_category and expected value, as the choices for generation:* - -| Category | Expected Value |Definition | -| :----- | :---- | :---- | -| **catalyst/co_catalyst** | The catalyst and co_catalyst used in the experiment |The query is about the catalyst condition | -| **light_source/lamp** | **light_source**: 'UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator'
**lamp**: 'Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator' | The query is about the light usage condition about the experiment | -| **reaction_medium** | 'Liquid', 'Gas' | The query is about the reaction medium used in the experiment | -| **reactor_type** | The query is about the type of the reactor used in the experiment | 'Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed' | -| **operation_mode** | 'Batch', 'Continuous', 'Batch/Continuous' | The query is about how the operation is conducted | - - +# SolarRAG-CLI Result Specification + +## Result Specification Table + +Here is the table that describe the saved output json file: + +| Category | Definition | DataType | Example | +| -------- | ------- | ------- | ------- | +| paper_title | The title of the paper | String | Effect of silver doping on the TiO2 for photocatalytic reduction of CO2 | +| DOI | The DOI of the paper(extracted from the paper pdf file) | String | 10.1016/j.apcatb.2010.02.030 | +| generation_model | The ollama model id for llm generation | String | llama3.2:3b | +| similarity_model | The ollama model id for text embedding | String | nomic-embed-text | +| similarity_metric | The metric for calculating the similarity between embeddings | String | Cosine_Similarity | +| rag_type | the type of rag pipeline, range=['fact', 'naive'] | String | fact | +| result | The list of generation result from the llm | List | Details given velow | + +The details for `result` part of the saved output json file: + +| Category | Definition | DataType | Example | Range(if applicable) | +| -------- | ------- | ------- | ------- | ------- | +| question_category | The category of the question for the llm | String | Light_source | [catalyst, co_catalyst, Light_source, Lamp, Reaction_medium, Reactor_type, Operation_mode] | +| query | The query for the llm | String | What is the Light_source used in the experiment? | Not applicable | +| generation | The generated answer from the llm | String | ###Light_source: UV | Not applicable | +| RAG_source | The information source provided for the RAG pipeline | String | generated_facts | Not applicable | +| selected_answer | The corresponding answer from the selection of choices | String | UV | Details are given below | +| evidences | The list of evidences for the RAG pipeline | list | Details given below | Not applicable | + +The range for `selected_answer` with correspongind `question_category`: +| question_category | Range(if applicable) | +| -------- | ------- | +| catalyst | Not applicable | +| co_catalyst | Not applicable | +| Light_source | 'UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator', 'Do not Know' | +| Lamp | 'Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator', 'Do not Know' | +| Reaction_medium | 'Liquid', 'Gas', 'Do not Know' | +| Reactor_type | 'Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed', 'Do not Know' | +| Operation_mode | 'Batch', 'Continuous', 'Batch/Continuous', 'Do not Know' | + +The details for `evidence` part of the `result` from the output json: + +| Category | Definition | DataType | Example | +| -------- | ------- | ------- | ------- | +| similairty_score | The similairty score between the query and correspond text provided to the embedding model | Float | 0.6205 | +| pdf_reference | The original text that are extracted from the paper | String | in the conventional focusing... | +| generated_fact | The generated facts based on the pdf_reference | String | Facts: 1. A homemade apparatus is used... | + +> **_NOTE:_** `generated_fact` only existed when the type of RAG pipeline is `fact` RAG. + +## Example of the SolarRAG-CLI result *Example:* ```json -"question_category": "catalyst/co_catalyst", -"query": "\nPlease find the name of the catalyst...", -"generation": { - "catalyst": "TiO2", - "co_catalyst": "Cu" -} -"evidence": [ - { - "pdf_reference": "of TiO 2 photocatalyst.The in situ IR experiments are still in progress to investigate the mechanism aspects of the catalyst.", - "similarity_score": 0.4707722067832947 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.49091827869415283 - }, - ... -] \ No newline at end of file + "paper_title": "Effect of silver doping on the TiO2 for photocatalytic reduction of CO2", + "DOI": "10.1016/j.apcatb.2010.02.030", + "generation_model": "llama3.2:3b", + "similarity_model": "nomic-embed-text", + "similarity_metric": "Cosine_Similarity", + "rag_type": "fact", + "result": [ + { + "question_category": "catalyst", + "query": "What is the chemical name of the catalyst used in the experiment", + "generation": "###catalyst: Titanium dioxide (TiO2)", + "RAG_source": "generated_facts", + "selected_answer": { + "catalyst": " Titanium dioxide (TiO2)" + }, + "evidences": [ + { + "similairty_score": 0.6646425724029541, + "pdf_reference": "methods were described in our previous publication 39.It is important to minimize the influence of transport phenomena during kinetic measurements.The elimination of CO 2 diffusion from the bulk of gas through the gas-liquid interface in a laboratory batch slurry reactor was accomplished by saturating the liquid with pure CO 2 before the reaction had been started 4,11.Catalyst loading of 1 g dm -3 was chosen to avoid concentration gradients in the bulk of stirred liquid with TiO 2 suspension due to the scattering effect of light caused by the high TiO 2 concentration 11,16,40,41.The", + "generated_facts": "Facts:\n1. The methods described in publication 39 were used for kinetic measurements.\n2. Kinetic measurements should minimize influence.\n3. CO2 diffusion from the bulk of gas through the gas-liquid interface was accomplished by saturating the liquid with pure CO2.\n4. Saturating the liquid with pure CO2 resulted in a high concentration of TiO2 suspension due to scattering effects of light.\n5. The scattering effect of light caused concentration gradients in the reactor.\n6. Concentration gradients in the reactor were avoided by choosing an optimal catalyst loading.\n7. An optimal catalyst loading was used to eliminate CO2 diffusion from the bulk of gas.\n8. Eliminating CO2 diffusion from the bulk of gas resulted in a laboratory batch slurry reactor being used for reaction.\n9. A laboratory batch slurry reactor was used to accomplish elimination of CO2 diffusion.\n10. The elimination of CO2 diffusion was achieved by using a TiO2 suspension that scattered light." + }, + ... + ] + } + ] +``` \ No newline at end of file diff --git a/CLI/code/FactsRAG.py b/CLI/code/FactsRAG.py new file mode 100644 index 0000000..c0cd9dd --- /dev/null +++ b/CLI/code/FactsRAG.py @@ -0,0 +1,280 @@ +from dotenv import load_dotenv +from cdlib import algorithms +import networkx as nx +import os +import json +import argparse +from collections import OrderedDict +from process import * + + +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.prompts import PromptTemplate +from langchain.retrievers import ContextualCompressionRetriever +from langchain.docstore.document import Document +from langchain_ollama import OllamaEmbeddings +from langchain_ollama import ChatOllama +from sentence_transformers import util + + +def get_text(data): + title_list = ["Article_Title", "Abstract", "Experimental", "Results and discussion"] + context = "" + context_list = [] + for section in data: + if section["title"] in title_list: + # self.context_list.append(f"{section["title"]}: {section["content"]}") + context += section["title"] + context += "\n" + context += section["content"] + context += "\n" + return context + + +def get_title(data): + for section in data: + if section["title"] == "Article_Title": + return section["content"] + else: + pass + return "None Given" + + +def get_doi(data): + for section in data: + if section["title"] == "Doi": + return section["content"] + else: + pass + return "None Given" + + +def clean_response(gen, category): + print(gen) + res = {} + s, e = gen.split(":") + res[category] = e + print(res) + return res + + +class SolarFact: + def __init__(self, llm_id, embedding_id, input_file_path=str(), context_file_path=str()): + self.llm_id = llm_id + self.embedding_id = embedding_id + self.input_file_path = input_file_path + self.context_file_path = context_file_path + self._get_llm() + self._get_documents() + self.context_result = { + "paper_title": self.paper_title, + "DOI": self.doi, + "generation_model": self.llm_id, + "similarity_model": self.embedding_id, + "similarity_metric": "Cosine_Similarity", + "result": [] + } + self.chunks, self.entities, self.relations, self.facts = self._prepare_pipeline() + + def _get_llm(self): + self.llm = ChatOllama(model=self.llm_id, temperature=0) + self.embeddings = OllamaEmbeddings(model=self.embedding_id) + + def _get_documents(self): + if self.input_file_path[-3:] == "pdf": + # print(1) + data = process_paper(self.input_file_path) + else: + with open(self.input_file_path, "rb") as f: + data = json.load(f) + self.paper_title = get_title(data) + self.doi = get_doi(data) + self.documents = get_text(data) + + def _split_documents_into_chunks(self, chunk_size=600, overlap_size=100): + documents = [Document(page_content=self.documents)] + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap_size) + chunks = text_splitter.split_documents(documents) + return chunks + + def _extract_entities_from_chunks(self, chunks): + entities = {} + title_list = ['Article Title', "Abstract", "Experimental", "Results and discussion"] + print(f"------Start extracting entities from chunks------") + print(f"------Total chunk count: {len(chunks)}------") + for index, chunk in enumerate(chunks): + if chunk in title_list: + pass + else: + response = self.llm.invoke( + [ + {"role": "system", "content": "Extract all the entities from the following text."}, + {"role": "user", "content": chunk.page_content} + ] + ) + entities_for_chunks = response.content + entities[index] = entities_for_chunks + print(f"------Entities extraction is done------") + return entities + + def _extract_relationships_from_chunks_and_entities(self, chunks, entities): + relations = {} + title_list = ['Article Title', "Abstract", "Experimental", "Results and discussion"] + print(f"------Start extracting entities from chunks------") + print(f"------Total chunk count: {len(chunks)}------") + for index, chunk in enumerate(chunks): + if chunk in title_list: + pass + else: + response = self.llm.invoke( + [ + {"role": "system", "content": "Extract all the relationship from the following context and provided entities in the format of triples, (subject, predicate, object)"}, + {"role": "user", "content": f"Context: {chunk.page_content}, Entities: {entities[index]}"} + ] + ) + relation = response.content + relations[index] = relation + print(f"------Relationships extraction is done------") + return relations + + def _generate_facts_from_relations(self, chunks, relations): + facts = {} + print(f"------Start generating factual sentences------") + for index, chunk in enumerate(chunks): + response = self.llm.invoke( + [ + {"role": "system", "content": "Construct simple fact sentences by combining the following relationships after the \"Facts:\" word."}, + {"role": "user", "content": f" Relations: {relations[index]}"} + ] + ) + fact = response.content + facts[index] = fact + print(f"------Facts generation is done------") + return facts + + def _cal_fact_cosine_similairty(self, facts, prompt, category): + if isinstance(facts, list): + facts = {index: value for index, value in enumerate(facts)} + sim_dict = {} + for key, fact in facts.items(): + fact_embed = self.embeddings.embed_query(fact) + query_embed = self.embeddings.embed_query(prompt) + cosine_similarity = util.cos_sim(fact_embed, query_embed) + sim_dict[key] = cosine_similarity[0].detach().item() + sorted_sim_dict = dict(sorted(sim_dict.items(), key=lambda item: item[1], reverse=True)) + return sorted_sim_dict + + def _generate_final_answer(self, sort_sim_dict, k, text, prompt, category, selection=None): + indexes = list(sort_sim_dict.keys())[:k] + context = "" + for ind in indexes: + context += text[ind] + if selection is None: + final_response = self.llm.invoke( + [ + {"role": "system", "content": f"Answer the following query based on the provided summary of facts. Please indicate the answer following the structure of \"###{category}:\" YOUR ANSWER"}, + {"role": "user", "content": f"Facts: {context} Query : {prompt}"} + ] + ) + + else: + final_response = self.llm.invoke( + [ + {"role": "system", "content": f"Answer the following query by selecting only one of the choices based on the provided summary of facts. Please indicate the answer following the structure of \"###{category}:\" YOUR ANSWER"}, + {"role": "user", "content": f"Query: {prompt} Choices: {selection} Facts: {context}"} + ] + ) + return indexes, final_response.content + + def _prepare_pipeline(self): + chunks = self._split_documents_into_chunks() + + entities = self._extract_entities_from_chunks(chunks) + + relations = self._extract_relationships_from_chunks_and_entities(chunks, entities) + + facts = self._generate_facts_from_relations(chunks, relations) + + return chunks, entities, relations, facts + + + def graphrag_pipeline(self, k, prompt, category): + print("---------------------running factrag pipeline---------------------") + ## Prepare data + #self.chunks, self.entities, self.relations, self.facts + + ## Get choices + if category in ["catalyst", "co_catalyst"]: + selection = None + elif category == "Light_source": + selection = "'UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator', 'Do not Know'" + elif category == "Lamp": + selection = "'Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator', 'Do not Know'" + elif category == "Reaction_medium": + selection = "'Liquid', 'Gas', 'Do not Know'" + elif category == "Reactor_type": + selection = "'Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed', 'Do not Know'" + elif category == 'Operation_mode': + selection = "'Batch', 'Continuous', 'Batch/Continuous', 'Do not Know'" + + ## Run Pipeline + sim_dict = self._cal_fact_cosine_similairty(self.facts, prompt, category) + + indexes, final_response = self._generate_final_answer(sim_dict, k, self.facts, prompt, category, selection) + + evidences = [] + + for index in indexes: + evidence = { + "similairty_score": sim_dict[index], + "pdf_reference": self.chunks[index].page_content, + "generated_facts": self.facts[index] + } + evidences.append(evidence) + temp = { + "question_category": category, + "query": prompt, + "generation": final_response, + "RAG_source": "generated_facts", + "selected_answer": clean_response(final_response, category), + "evidences": evidences + } + return temp + + +def get_parser(): + parser = argparse.ArgumentParser(description="Demo of LLM Pipeline") + parser.add_argument('--llm_id', type=str, default=True, help="the parameter of which LLM model from ollama to use") + parser.add_argument('--embedding_id', type=str, default=True, help="the parameter of which embedding model from ollama to use") + parser.add_argument('--input_file_path', type=str, default=True, help="path for input data, pdf file or extracted json file") + parser.add_argument('--prompt_file', help='queries', type=str) + parser.add_argument('--context_file_path', type=str, default=True, help="save context file") + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + args_dict = vars(args) + prompt_file = args_dict["prompt_file"] + del args_dict["prompt_file"] + factrag = SolarFact(**args_dict) + context_result = { + "paper_title": factrag.paper_title, + "DOI": factrag.doi, + "generation_model": factrag.llm_id, + "similarity_model": factrag.embedding_id, + "similarity_metric": "Cosine_Similarity", + "result": [] + } + with open(prompt_file, "rb") as f: + query_data = json.load(f) + for key, value in query_data.items(): + temp = factrag.graphrag_pipeline(5, value, key) + context_result["result"].append(temp) + # print(context_result) + with open(factrag.context_file_path, "w") as f: + json.dump(context_result, f) + + +main() \ No newline at end of file diff --git a/CLI/code/process.py b/CLI/code/process.py index 36e3655..d258961 100644 --- a/CLI/code/process.py +++ b/CLI/code/process.py @@ -1,21 +1,16 @@ from grobid_client.grobid_client import GrobidClient -import subprocess -from multiprocessing import Pool import xml.etree.ElementTree as ET import json import pandas as pd import re import unicodedata import logging -import threading -# from cli import * -# from cli import * # Configurar el registro (logging) logging.basicConfig(level=logging.ERROR, format='%(asctime)s:%(levelname)s:%(message)s') # GROBID client configuration -client = GrobidClient(config_path="./setting/config.json") +client = GrobidClient(config_path="./settings/config.json") service_name = "processFulltextDocument" def process_fulltext_document(service, file): @@ -46,7 +41,6 @@ def clean_text(text): return text def extract_sections_from_xml(xml_content): - """ Extrae secciones relevantes del contenido XML devuelto por Grobid. """ root = ET.fromstring(xml_content) ns = {'tei': 'http://www.tei-c.org/ns/1.0'} @@ -82,12 +76,25 @@ def extract_content_by_keywords(start_keywords, end_section): content.append(clean_text(text)) return " ".join(content) - # Extracción de secciones + # Nueva función para extraer el DOI + def extract_doi(): + """Extrae el DOI del documento usando su etiqueta específica.""" + doi_element = root.find(".//tei:idno[@type='DOI']", ns) + if doi_element is not None: + return clean_text(doi_element.text.strip()) + return None + + # Extraer el DOI + doi = extract_doi() + + # Construcción de las secciones sections = [ + {"title": "Doi", "content": doi if doi else "Doi not found"}, # Incluir el DOI como primera sección + {"title": "Article_Title", "content": title}, # Agregar el título como primera sección {"title": "Abstract", "content": extract_content_by_tag("abstract")}, {"title": "Experimental", "content": extract_content_by_keywords( ["Experimental", "Experimental studies", "Experiments", "Experimental methods", "Methods"], "Results and discussion")}, - {"title": "Results and discussion", "content": extract_content_by_keywords( + {"title": "Results_and_discussion", "content": extract_content_by_keywords( ["Results and discussion", "Result and discussion", "Results"], "Conclusion")}, {"title": "Conclusions", "content": extract_content_by_keywords( ["Conclusion", "Conclusions"], "Conclusion")}, @@ -97,21 +104,19 @@ def extract_content_by_keywords(start_keywords, end_section): supporting_content = extract_content_by_keywords( ["Supporting Information", "Supporting"], "Conclusion") if supporting_content: - sections.append({"title": "Supporting Information", "content": supporting_content}) + sections.append({"title": "Supporting_Information", "content": supporting_content}) return sections -def run_process(cmd): - subprocess.run(command, capture_output=True, shell=True) def process_paper(pdf_file_path): - """ Procesa un archivo PDF y devuelve una lista con objetos JSON por cada sección. """ + try: # Procesar el PDF con Grobid para obtener el XML xml_response = process_fulltext_document(service_name, pdf_file_path) if xml_response: - # Extraer secciones relevantes del XML y devolverlas como lista de objetos + # Extraer secciones relevantes del XML sections = extract_sections_from_xml(xml_response) return sections # Devolver el resultado como una lista de objetos JSON else: @@ -119,66 +124,7 @@ def process_paper(pdf_file_path): except Exception as e: logging.error(f"Error processing paper: {e}") return [{"error": str(e)}] - -# def get_parser(): -# parser = argparse.ArgumentParser(description="Demo of LLM Pipeline") -# parser.add_argument('--use_platform', type=lambda x:str2bool(x), default=True, help="the parameter of whether use online llm platform or use local model") -# parser.add_argument('--user_key', default="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", help="if use platform, enter your key for platform", type=str) -# parser.add_argument('--llm_id', default="llama-3.1-70b-versatile", help="the reference for the selected model, support grog model, huggingface llm or local model path ", type=str) -# parser.add_argument('--hf_key', default="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", help="your huggingface token", type=str) -# parser.add_argument('--llm_platform', default="groq", help='your platform choice', choices=["groq"], type=str) -# parser.add_argument('--sim_model_id', default='Salesforce/SFR-Embedding-Mistral', help="encoder model for RAG", type=str) -# parser.add_argument('--pdf_file_path', help='input data, extracted context from pdf', type=str) -# parser.add_argument('--prompt_file_pdf', help='queries', type=str) -# parser.add_argument('--context_file_path', help='save context file', type=str) -# parser.add_argument('--grobid_path', help='the directoray of your grobid location', type=str) -# return parser - -# def main(): -# parser = get_parser() -# args = parser.parse_args() -# args_dict = vars(args) -# print(args_dict) -# prompt_file_pdf = args_dict["prompt_file_pdf"] -# grobid_path = args_dict["grobid_path"] -# global command -# command = f"cd ~; cd {grobid_path}; ./gradlew run" -# # DETACHED_PROCESS = 0x00000008 -# # subprocess.Popen(["bash", "run_grobid.sh"], creationflags=DETACHED_PROCESS) -# # subprocess.Popen(["bash", "run_grobid.sh"], close_fds=True) -# # subprocess.Popen(["bash", "run_grobid.sh"]) -# print("Grobid is running") -# global client -# client = GrobidClient(config_path="./setting/config.json") -# global service_name -# service_name = "processFulltextDocument" -# del args_dict["prompt_file_pdf"] -# del args_dict["grobid_path"] -# start_time = time.time() -# solar = SolarQA(**args_dict) -# print("--- %s Data Preparation and Model Loading time consumption: seconds ---" % (time.time() - start_time)) -# temp_time = time.time() -# with open(prompt_file_pdf, "rb") as f: -# query_data = json.load(f) -# solar.generation(query_data=query_data) -# print(solar.result) -# print("--- %s Model generation time consumption: seconds ---" % (time.time() - temp_time)) -# solar.save_context() - -# pool = Pool(2) -# pool.apply_async(subprocess.Popen(["bash", "run_grobid.sh"]), (0,)) -# pool.apply_async(main(), (0,)) -# # main() - -# # Configurar el registro (logging) -# logging.basicConfig(level=logging.ERROR, format='%(asctime)s:%(levelname)s:%(message)s') - -# # GROBID client configuration -# client = GrobidClient(config_path="./setting/config.json") -# service_name = "processFulltextDocument" - -# print(type(process_paper("/home/jovyan/grob/test.pdf"))) \ No newline at end of file diff --git a/CLI/code/prompt.json b/CLI/code/prompt.json new file mode 100644 index 0000000..219922f --- /dev/null +++ b/CLI/code/prompt.json @@ -0,0 +1,9 @@ +{ + "catalyst": "What is the chemical name of the catalyst used in the experiment?", + "co_catalyst": "What is the chemical name of the co_catalyst used in the experiment?", + "Light_source": "From the choices provided, which light source is used in the experiment?", + "Lamp": "From the choices provided, which lamp is used in the experiment?", + "Reaction_medium": "From the choices provided, which reaction medium is used in the experiment?", + "Reactor_type": "From the choices provided, which reactor type is used in the experiment?", + "Operation_mode": "From the choices provided, which oepration mode is used in the experiment?" +} \ No newline at end of file diff --git a/CLI/code/run_factrag.sh b/CLI/code/run_factrag.sh new file mode 100644 index 0000000..4b838f9 --- /dev/null +++ b/CLI/code/run_factrag.sh @@ -0,0 +1,29 @@ +# !/bin/bash + +llm_id='llama3.2:3b' +embedding_id='nomic-embed-text' +prompt_file='/home/jovyan/GraphRAG/self_developed/prompt.json' + +# Define the target directory +input_directory="/home/jovyan/GraphRAG/self_developed/10_bench" +output_directory="/home/jovyan/GraphRAG/self_developed/context/" + + +if [ ! -d "$input_directory" ]; then + exit 1 +fi + +for file in "$input_directory"/*; do + if [ -f "$file" ]; then + var=$(echo "$file" | cut -d "_" -f 4) + ind=$(echo "$var" | cut -d "." -f 1) + output_file="${output_directory}result_${ind}.json" + if [ -f "$output_file" ]; then + echo "file exist" + else + python FactsRAG.py --llm_id "$llm_id" --embedding_id "$embedding_id" --input_file_path "$file" --prompt_file "$prompt_file" --context_file_path "$output_file" + echo $output_file + fi + fi +echo "DONE" +done \ No newline at end of file diff --git a/CLI/code/settings/config.json b/CLI/code/settings/config.json new file mode 100644 index 0000000..789c3fa --- /dev/null +++ b/CLI/code/settings/config.json @@ -0,0 +1,7 @@ +{ + "grobid_server": "http://localhost:8070", + "batch_size": 1000, + "sleep_time": 5, + "timeout": 60, + "coordinates": ["persName", "figure", "ref", "biblStruct", "formula", "s"] +} From 4973f17306ddc1d4b9f63a2ef27aa5065ebb74f9 Mon Sep 17 00:00:00 2001 From: Clark Wang <107419732+OEG-Clark@users.noreply.github.com> Date: Wed, 11 Dec 2024 20:52:23 +0100 Subject: [PATCH 2/4] Removed CLO_old From 5e172404dfc18675383fe44af7f7bfd4c53f866e Mon Sep 17 00:00:00 2001 From: Clark Wang <107419732+OEG-Clark@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:04:01 +0100 Subject: [PATCH 3/4] Delete CLI directory --- CLI/CLI_old/README.md | 59 -- CLI/CLI_old/Result_Spec.md | 87 --- .../code/__pycache__/process.cpython-310.pyc | Bin 3826 -> 0 bytes CLI/CLI_old/code/cli.py | 278 -------- CLI/CLI_old/code/data/context.json | 154 ----- CLI/CLI_old/code/data/paper1_extraction.json | 14 - CLI/CLI_old/code/data/prompts.json | 1 - CLI/CLI_old/code/process.py | 184 ----- CLI/CLI_old/code/run_benchmark.py | 247 ------- CLI/CLI_old/code/run_grobid.sh | 16 - CLI/CLI_old/code/setting/config.json | 7 - CLI/README.md | 42 -- CLI/Result_Example/SolarFactRAG_Example.json | 1 - CLI/Result_Example/SolarNavieRAG_Example.json | 1 - CLI/Result_Spec.md | 78 --- CLI/code/FactsRAG.py | 280 -------- CLI/code/GraphRAG/graphrag.ipynb | 626 ------------------ CLI/code/GraphRAG/graphrag_prompt.json | 7 - CLI/code/GraphRAG/run_benchmark.sh | 36 - CLI/code/__pycache__/process.cpython-310.pyc | Bin 3826 -> 0 bytes CLI/code/cli.py | 278 -------- CLI/code/data/context.json | 154 ----- CLI/code/data/paper1_extraction.json | 14 - CLI/code/data/prompts.json | 1 - CLI/code/process.py | 130 ---- CLI/code/prompt.json | 9 - CLI/code/run_benchmark.py | 247 ------- CLI/code/run_factrag.sh | 29 - CLI/code/run_grobid.sh | 16 - CLI/code/setting/config.json | 7 - CLI/code/settings/config.json | 7 - 31 files changed, 3010 deletions(-) delete mode 100644 CLI/CLI_old/README.md delete mode 100644 CLI/CLI_old/Result_Spec.md delete mode 100644 CLI/CLI_old/code/__pycache__/process.cpython-310.pyc delete mode 100644 CLI/CLI_old/code/cli.py delete mode 100644 CLI/CLI_old/code/data/context.json delete mode 100644 CLI/CLI_old/code/data/paper1_extraction.json delete mode 100644 CLI/CLI_old/code/data/prompts.json delete mode 100644 CLI/CLI_old/code/process.py delete mode 100644 CLI/CLI_old/code/run_benchmark.py delete mode 100644 CLI/CLI_old/code/run_grobid.sh delete mode 100644 CLI/CLI_old/code/setting/config.json delete mode 100644 CLI/README.md delete mode 100644 CLI/Result_Example/SolarFactRAG_Example.json delete mode 100644 CLI/Result_Example/SolarNavieRAG_Example.json delete mode 100644 CLI/Result_Spec.md delete mode 100644 CLI/code/FactsRAG.py delete mode 100644 CLI/code/GraphRAG/graphrag.ipynb delete mode 100644 CLI/code/GraphRAG/graphrag_prompt.json delete mode 100644 CLI/code/GraphRAG/run_benchmark.sh delete mode 100644 CLI/code/__pycache__/process.cpython-310.pyc delete mode 100644 CLI/code/cli.py delete mode 100644 CLI/code/data/context.json delete mode 100644 CLI/code/data/paper1_extraction.json delete mode 100644 CLI/code/data/prompts.json delete mode 100644 CLI/code/process.py delete mode 100644 CLI/code/prompt.json delete mode 100644 CLI/code/run_benchmark.py delete mode 100644 CLI/code/run_factrag.sh delete mode 100644 CLI/code/run_grobid.sh delete mode 100644 CLI/code/setting/config.json delete mode 100644 CLI/code/settings/config.json diff --git a/CLI/CLI_old/README.md b/CLI/CLI_old/README.md deleted file mode 100644 index 765046f..0000000 --- a/CLI/CLI_old/README.md +++ /dev/null @@ -1,59 +0,0 @@ -# Solar-QA-CLI -This repository contains the command-line tool for [solar-qa pipepine](https://github.com/oeg-upm/solar-qa-eval) - -## Requirement - -### Requirement for Paper Extraction - -- [Grobid](https://grobid.readthedocs.io/en/latest/) - -### Requirement for Generation - -All required libraries are detailed in requirement.txt. - -```console -pip install -r requirement.txt -``` - - -### Usage - -1. Install [GROBID](https://grobid.readthedocs.io/en/latest/) in your computing device -2. Start running GROBID server in your local device following the instruction from [GROBID WEBSITE](https://grobid.readthedocs.io/en/latest/). -3. With the running GROBID server, upload the configuration of GROBID in sub-folder `.../CLI/setting/config.json`. The file is given below: - -```json -{ - "grobid_server": "http://localhost:8070", - "batch_size": 1000, - "sleep_time": 5, - "timeout": 60, - "coordinates": ["persName", "figure", "ref", "biblStruct", "formula", "s"] -} -``` - - -4. Run the entire command-line tool by running the `cli.py` in the directory `.../CLI/code/cli.py`. The command line to run the `cli.py` is given below: -```json -{ - "--use_platform": the parameter of whether use online platform or local model for the llm(generation model). option = ["True", "False"] - "--user_key": the user key or token for the online platform, type="str" - "--llm_id": the reference id for the llm(generation model), type="str" - "--hf_key": your huggingface token, this is required to use the similarity model, type="str" - "--llm_platform": indication of which llm online platform you wish to use, option=["grob"] - "--sim_model_id": the reference id for the similarity model, type="str" - "--input_file_path": the directory for the pdf fild that you wish to analysis, type="str", file type=.pdf - "--prompt_file_path": the directory for the json file that contains your prompt, file type=.json - "--context_file_path": the directory for where you wish to save the output file, file type=.json -} -``` -*Example not use online platform:* -```console -python cli.py --use_platform False --hf_key YOUR_HF_KEY --llm_id meta-llama/Llama-3.2-3B-Instruct --sim_model_id Salesforce/SFR-Embedding-Mistral --pdf_file_path .../test.pdf --prompt_file .../prompts.json --context_file_path .../context.json -``` -*Example use online platform:* -```console -python cli.py --use_platform True --user_key YOUR_USER_KEY --hf_key YOUR_HF_KEY --llm_id llama-3.1-70b-versatile --llm_platform grob --sim_model_id Salesforce/SFR-Embedding-Mistral --pdf_file_path .../test.pdf --prompt_file .../prompts.json --context_file_path .../context.json -``` - -5. Result format is given at `Result_Spec.md` \ No newline at end of file diff --git a/CLI/CLI_old/Result_Spec.md b/CLI/CLI_old/Result_Spec.md deleted file mode 100644 index 808defb..0000000 --- a/CLI/CLI_old/Result_Spec.md +++ /dev/null @@ -1,87 +0,0 @@ -## CLI Result Specification - -### Generation Model -This item contains the large lanugage model (llm) reference id. - -*Example:* - -```json -"generation_model": "meta-llama/Llama-3.2-3B-Instruct" -``` - - -### Similarity Model -This item contains the similarity model reference id (the model is used in RAG searching stage). - -*Example:* - -```json -"similarity_model": "Salesforce/SFR-Embedding-Mistral" -``` - - -### Similarity Metric -This item contains the calculation metric used in the RAG searching stage - -*Example:* - -```json -"similarity_metric": "Cosine_Similarity" -``` - - -### Result -This item contains all relevant generation results, which includes: - -| Property | Mandatory? | Expected Value | Definition | -| :----- | :---- | :---- | :---- | -| **question_category** | Yes | String | This item refers to what the query is about, five possible choices are given below | -| **query** | Yes | String | This item contains the prompt used for the llm generation | -| **generation** | Yes | Dictionary | This item contains the generation result from the llm model, formatted as question_category: answer in a dictionary, as given in the table below | -| **evidence** | Yes | Dictionary | This item contains the evidence to support the RAG algorithm, which contains **pdf_refercence** and **similiarity_score** | -| **pdf_reference** | Yes | String | This item contains the original text extracted by the similarity model | -| **similiarity_score** | Yes | Float | This item contains the similarity score that are calcuated between quesry embedding and pdf_reference embedding, the calculation metric is the similarity metric. | - -*generation format:* - -Generation contains the result from llm's generation, the result only have one category of the item/items, with the corresponding expected value/values. -The choices for **individual category** and **expected value** are given below. - -```json -"generation":{ - "individual category": "excepted value", - ... -} -``` - -*Choices for question_category and expected value, as the choices for generation:* - -| Category | Expected Value |Definition | -| :----- | :---- | :---- | -| **catalyst/co_catalyst** | The catalyst and co_catalyst used in the experiment |The query is about the catalyst condition | -| **light_source/lamp** | **light_source**: 'UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator'
**lamp**: 'Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator' | The query is about the light usage condition about the experiment | -| **reaction_medium** | 'Liquid', 'Gas' | The query is about the reaction medium used in the experiment | -| **reactor_type** | The query is about the type of the reactor used in the experiment | 'Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed' | -| **operation_mode** | 'Batch', 'Continuous', 'Batch/Continuous' | The query is about how the operation is conducted | - - - -*Example:* -```json -"question_category": "catalyst/co_catalyst", -"query": "\nPlease find the name of the catalyst...", -"generation": { - "catalyst": "TiO2", - "co_catalyst": "Cu" -} -"evidence": [ - { - "pdf_reference": "of TiO 2 photocatalyst.The in situ IR experiments are still in progress to investigate the mechanism aspects of the catalyst.", - "similarity_score": 0.4707722067832947 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.49091827869415283 - }, - ... -] \ No newline at end of file diff --git a/CLI/CLI_old/code/__pycache__/process.cpython-310.pyc b/CLI/CLI_old/code/__pycache__/process.cpython-310.pyc deleted file mode 100644 index 46f437440408bf91bb39c6003a72a8fe46e94396..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3826 zcmbVPTaz0{74DuHjkJ>X;`OfaC81_hm_#_17APvo#yA-7E*O$cV!IL;ucMls?$wSn zni1Wjy$FdiwXTZ&kUu~OrG=+F^AF&K-!QK{`6u8i0^ey#-d$5XfTrej_vtfzIp;eU zrPI@8hToqq{d4~4s4Z)H+?8<>j7M#$ zbe-)94XnN;oC{18KDE?E z^xX^WV|IsSmMD&9zF=Qkhfw#&Q1~6@u*^PqhqHmrS#JhXrM`VE@k_`I?BCcVIqL24 zJyx6AxVv5}`jwVUqzW>5YgPO6xR*}jNMPLDk%5rPUp&bRqb$fG2>FXq62@I2n=06A z9<$G%WCz>ZQ5=mwn9XF=4AWGJD8cgDKci)f^wiWKGJy)8awz8M--wXzdc`%RQnljs zme%$|*`bX+(N(aA_rdDAH`Y6bD_+gX-O!lZuZ;Q4j@Zr%9f)W7)bj;uj$hPL?M9)@ z9UM>kw$h!P%deQ4#-`7f8aruQHXfwA2SL)flg5E++J;jCB>{qBX9KmOn9$ z8}TPh*aQBU!$8hs7|OVBq07}bg*~+U*3dbE^soTM^Uv64d>aleijPVI8+~pfTWR2k za^Dt}LoTYnx5RYcX)!VLM=oX$EyI~LoH=mLj5+gtN1PG!Eo;JQIfDXtXT`#ZH*ou| zYW535SDfv;w8{}eM!x{lFZNcwH7F;Lo|a(<$AVdvMEDEfiyG#QJ@`G`G7_oxi}&6I zXSXY3LPe^)QFyI;xecf8UE0aA&Pt=Px3^cv_-a^BRjZNc#&_$>3fY78s`W+#Gb>q? z#d716b_wh*UXs%vfwZ6{!5<@!Twq^7vwhGnoq+}YvOaFG%I+@=kRxCNduWkn2TtEn zm-~EZXXufh4>{fRS7$^4Qt0Ex?nckSDSoY9#vVBzn1iW#3~Fv(+>R0v#BuICNTVb# zMZh^BBzJ>OM<&96LR}=`LSD*(R+G|4UL2_?%Na*!oW^uj6?%EhqkFt?JiRd&XP-gTiqJ3=3=#5qBwLPvtiMAK?zBidc|5rge5 zN_REBVX}I4sM_pg1ZD>F1p~re3PSpIr zSz^48J~r3$_3!4qa{`2w+#%G#qk>jm0_OBylkIx^WBd#M z)pS=NqrV4Iwo2Twt0-*d`HWpHSI?k`;qJd2@SoFXRs~Yv&2h(4KgMULe3<-+{#lsv zCPt^Xkg*Ke^w1(dRYF0>2je1l(G)-SsLZ8UFL>tm?L$72 zb{UFZhl*aTVOmr27P=o|OYZ|yuLoW?@d6d@M7yaeClAIsZdYO^@Zty#MM=<0w;sqW z)!xtUyuV@6Ul@pBlsEtS=LA`>fasE{-UU;xo+wMbY)8`4_1^janCb)o@`|@nE1DuO zua1jSlUa=76e%h%#%ZesR47sz3Y<)#)SX6Y+oa-k^be#;pg>{PL18Wx`KqF_=Ec%y zdQ!HUy+cUob0C&gwqQfc2Y773Qmia5y92M8~QzJzoIbWo7Br_#t zeQgB7yXZqH#L6otc%-M;<) zZB?S#RU%X}kMc30OVKe;T-3nYAi8v|or-QOuX9M4*i~io0PN(Mv&%LBY?bCqQ`I;B E4Ymv$aR2}S diff --git a/CLI/CLI_old/code/cli.py b/CLI/CLI_old/code/cli.py deleted file mode 100644 index fe1cf8c..0000000 --- a/CLI/CLI_old/code/cli.py +++ /dev/null @@ -1,278 +0,0 @@ -# File : pipeline.py -# Time :2024/7/14 14:11 -# Author :ClarkWang -# Contact :wzyyyyyy0519@gmail -# Description: - -# !pip install --upgrade langchain langchain_groq -# !pip install --upgrade langchain-community -# !pip install --upgrade langchain-core -# !pip install --upgrade langsmith -# !pip install ctransformers[cuda] -# !pip install huggingface-hub -# !pip install --upgrade sqlalchemy -# !pip install rdflib -# !pip install llama-cpp-python -# !pip install typing-extensions==4.7.1 --upgrade -# !pip install pypdf2 -# !pip install sentence-transformers==2.7.0 -# !pip install --upgrade transformers -# !pip install faiss-cpu -# !pip install bitsandbytes accelerate - -import subprocess -from multiprocessing import Pool, Process -import time - - -background_process = Process(target=subprocess.Popen, args=(["bash", "run_grobid.sh"],)) -background_process.start() -print(background_process.is_alive()) -time.sleep(15) - -# client = GrobidClient(config_path="./setting/config.json") - - - -from grobid_client.grobid_client import GrobidClient -import os -import json -import argparse - -import torch -import transformers -from huggingface_hub import login -from langchain_groq import ChatGroq -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.vectorstores import FAISS -from langchain_community.embeddings import HuggingFaceEmbeddings -from langchain.prompts import PromptTemplate -from langchain.chains import RetrievalQA -from process import * - - - -# client = GrobidClient(config_path="./setting/config.json") - -def get_context(context): - res = [] - for item in context: - res.append(item.page_content) - return res - -def clean_gen(gen): - res = {} - for line in gen.split("\n"): - if ":" in line: - try: - s, e = line.split(":") - res[s.strip()] = e - except: - pass - return res - -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") - -class SolarQA: - def __init__(self, use_platform, user_key, llm_id, hf_key, llm_platform="LOCAL", temperature=0.1, sim_model_id="Salesforce/SFR-Embedding-Mistral", input_file_path=str(), context_file_path=""): - # print(input_file_path) - self.use_platform = use_platform - self.llm_id = llm_id - self.user_key = user_key - self.hf_key = hf_key - self.llm_platform = llm_platform.lower() - self.temperature = temperature - self.sim_model_id = sim_model_id - self.sys_prompt = """ - You are an assistant for extract information from context and selection the possible answer from the selection provided. - You are given the extracted parts of a paper about solar chemistry and a question. Provide the extracted information and nothing else. - """ - self.context_file_path = context_file_path - # self.data = data - self.input_file_path = input_file_path - - self.context_result = { - "generation_model": self.llm_id, - "similarity_model": self.sim_model_id, - "similarity_metric": "Cosine_Similarity", - "result": [] - } - login(self.hf_key) - self.get_text() - self.get_vector() - print("¡¡¡Vector Store Database is prepared!!!") - self.get_llm() - - def get_text(self): - title_list = ["Abstract", "Experimental", "Results and discussion"] - # with open(self.json_path, "rb") as f: - # data = json.load(f) - # print(self.input_file_path) - if self.input_file_path[-3:] == "pdf": - data = process_paper(self.input_file_path) - else: - with open(self.input_file_path, "rb") as f: - data = json.load(f) - # print("¡¡¡PDF file has been extracted!!!") - self.context = "" - for section in data: - if section["title"] in title_list: - self.context += section["title"] - self.context += "\n" - self.context += section["content"] - self.context += "\n" - - def get_llm(self): - if self.use_platform: - if self.llm_platform == "groq": - os.environ["GROQ_API_KEY"] = self.user_key - self.llm = ChatGroq(temperature=self.temperature, model_name=self.llm_id) - else: - raise ValueError('Unsupportted Platform') - else: - try: - bnb_config = transformers.BitsAndBytesConfig( - load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", - bnb_4bit_compute_dtype=torch.bfloat16 - ) - self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.llm_id) - self.llm = transformers.AutoModelForCausalLM.from_pretrained( - self.llm_id, - torch_dtype=torch.bfloat16, - device_map="auto", - quantization_config=bnb_config - ) - self.terminators = [ - self.tokenizer.eos_token_id, - self.tokenizer.convert_tokens_to_ids("<|eot_id|>") - ] - except: - raise ValueError('Unsupportted Platform') - - def get_vector(self): - model_kwargs = {"device": "cpu"} - self.sim_model = HuggingFaceEmbeddings(model_name=self.sim_model_id, model_kwargs=model_kwargs) - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=512, - chunk_overlap=32, - length_function=len - ) - chunks = text_splitter.split_text(text=self.context) - self.vector_store = FAISS.from_texts(chunks, embedding=self.sim_model, normalize_L2=True, distance_strategy="COSINE") - - def search(self, query, k): - embed_q = self.sim_model.embed_query(query) - self.context = self.vector_store.similarity_search_with_score_by_vector(embed_q, k) - - def format_prompt(self, query, k): - self.search(query, k) - prompt = self.sys_prompt + "\n" + "Question:" - prompt += query - prompt += "\n" - prompt += "Context:" - for i in range(k): - prompt += f"{self.context[i]}\n" - return prompt - - def generation(self, query_data): - res = "" - if self.use_platform: - for key, query in query_data.items(): - new_prompt = self.format_prompt(query, 5) - messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] - outputs = self.llm.invoke(messages) - response = outputs.content - temp_res = { - "question_category": key, - "query": query, - "generation": clean_gen(response), - "evidence": [] - } - for i in range(len(self.context)): - context = self.context[i][0].page_content - sim_score = float(self.context[i][1]) - temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) - self.context_result["result"].append(temp_res) - res += response - res += "\n" - self.result = clean_gen(res) - else: - for key, query in query_data.items(): - new_prompt = self.format_prompt(query, 5) - messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] - input_ids = self.tokenizer.apply_chat_template( - messages, - add_generation_prompt=True, - return_tensors="pt" - ) - outputs = self.llm.generate( - input_ids, - max_new_tokens=1024, - eos_token_id=self.terminators, - do_sample=True, - temperature=self.temperature, - top_p=0.9, - ) - response = self.tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True) - temp_res = { - "question_category": key, - "query": query, - "generation": clean_gen(response), - "evidence": [] - } - for i in range(len(self.context)): - context = self.context[i][0].page_content - sim_score = float(self.context[i][1]) - temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) - self.context_result["result"].append(temp_res) - res += response - res += "\n" - self.result = clean_gen(res) - - def save_context(self): - # print(self.context_result) - with open(self.context_file_path, "w") as f: - json.dump(self.context_result, f) - print(f"RAG context is saved at: {self.context_file_path}") - - - -def get_parser(): - parser = argparse.ArgumentParser(description="Demo of LLM Pipeline") - parser.add_argument('--use_platform', type=lambda x:str2bool(x), default=True, help="the parameter of whether use online llm platform or use local model") - parser.add_argument('--user_key', default="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", help="if use platform, enter your key for platform", type=str) - parser.add_argument('--llm_id', default="llama-3.1-70b-versatile", help="the reference for the selected model, support grog model, huggingface llm or local model path ", type=str) - parser.add_argument('--hf_key', default="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", help="your huggingface token", type=str) - parser.add_argument('--llm_platform', default="groq", help='your platform choice', choices=["groq"], type=str) - parser.add_argument('--sim_model_id', default='Salesforce/SFR-Embedding-Mistral', help="encoder model for RAG", type=str) - parser.add_argument('--input_file_path', help='input data, extracted context from pdf', type=str) - parser.add_argument('--prompt_file_pdf', help='queries', type=str) - parser.add_argument('--context_file_path', help='save context file', type=str) - return parser - -def main(): - parser = get_parser() - args = parser.parse_args() - args_dict = vars(args) - prompt_file_pdf = args_dict["prompt_file_pdf"] - del args_dict["prompt_file_pdf"] - start_time = time.time() - solar = SolarQA(**args_dict) - print("--- %s Data Preparation and Model Loading time consumption: seconds ---" % (time.time() - start_time)) - temp_time = time.time() - with open(prompt_file_pdf, "rb") as f: - query_data = json.load(f) - solar.generation(query_data=query_data) - print(solar.result) - print("--- %s Model generation time consumption: seconds ---" % (time.time() - temp_time)) - solar.save_context() - - -print("start") -main_process = Process(target=main) -main_process.start() -if main_process.is_alive(): - print("main process is finished") -else: - background_process.kill() diff --git a/CLI/CLI_old/code/data/context.json b/CLI/CLI_old/code/data/context.json deleted file mode 100644 index 8ed839d..0000000 --- a/CLI/CLI_old/code/data/context.json +++ /dev/null @@ -1,154 +0,0 @@ -{ - "generation_model": "meta-llama/Llama-3.2-3B-Instruct", - "similarity_model": "Salesforce/SFR-Embedding-Mistral", - "similarity_metric": "Cosine_Similarity", - "result": [ - { - "question_category": "catalyst/co_catalyst", - "query": "\nPlease find the name of the catalyst and co-catalyst name from the provided context which describe an solar chemical experiment.\nPlease only generate the name of the catalyst and nothing else. Such as TiO2. ZrO2 or Ag, etc.\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nIf there are multiple results, please indicate them as XXX-XXX.\ncatalyst: XXX\nco_catalyst: XXX\n", - "generation": { - "catalyst": " TiO2", - "co_catalyst": " Cu" - }, - "evidence": [ - { - "pdf_reference": "of TiO 2 photocatalyst.The in situ IR experiments are still in progress to investigate the mechanism aspects of the catalyst.", - "similarity_score": 0.4707722067832947 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.49091827869415283 - }, - { - "pdf_reference": "2 catalyst.The highest methanol yield as well as quantum efficiency was achieved by 3CuO/TiO 2 indicating that this catalyst had the highest reactivity among all the prepared catalysts.Besides describing the catalyst reactivity, quantum efficiency value also figured the effectiveness of the reaction system.The effect of copper loading on methanol yields is shown in Fig. 4. The methanol yields increased with Cu loading, but then decreased when the Cu loading exceeded 3 wt.Evidently, more Cu loading can", - "similarity_score": 0.5129979848861694 - }, - { - "pdf_reference": "for Degussa P25 and 3CuO/TiO 2 catalysts.A mean value for E a of ca.26 and 12 kJ/mol was calculated for Degussa P25 and 3CuO/TiO 2 , respectively.These positive values of E a imply that the desorption of products is the rate limiting step in the photosynthetic formation of methanol.In addition, the apparent lower activation energy of 3CuO/TiO 2 catalyst may indicate a catalytic role of copper species as an active site to provide methanol, and hence improving the photoefficiency of TiO 2 photocatalyst.The", - "similarity_score": 0.5173196792602539 - }, - { - "pdf_reference": "Photocatalytic reduction of CO 2 by copper-doped titania catalysts has been investigated.The photocatalysts with various copper species (Cu 0 , Cu I , Cu II ) were prepared by an improved-impregnation method, where copper nitrate is doped into TiO 2 Degussa-P25.It is likely that copper present on the catalyst surface and the grain size of copper-titania catalysts is uniform, with crystallite size approximately 23 nm.The dispersion capacity of CuO in the vacant sites of TiO 2 is about 4.16 Cu 2 nm A2 (2.2", - "similarity_score": 0.5403363704681396 - } - ] - }, - { - "question_category": "light_source/lamp", - "query": "\nPlease find the category of Light_source and Lamp from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Light_source is Solar or Solar Simulator, Lamp is always Solar Simulator.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nLight_source: ['UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator']\nLamp: ['Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nLight_source: XXX\nLamp: XXX\n", - "generation": { - "Light_source": " Solar Simulator", - "Lamp": " Solar Simulator" - }, - "evidence": [ - { - "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", - "similarity_score": 0.47584426403045654 - }, - { - "pdf_reference": "ranging from 0.4 to 1.0 lm.The EDX/Mapping analysis of Cu on the catalysts demonstrated that Cu uniformly dispersed on TiO 2 surface, and further increase of copper leads to the increase of the dot intensities of Cu.In addition, Table 1 lists the elemental wt of Cu estimated from EDX and AAS for comparison.The difference between wt of Cu obtained by EDX on different voltage (15 and 22 kV) and AAS indicated that most Cu was on the surface of the TiO 2 . The DRS patterns of TiO 2 P25 and all CuO-loaded", - "similarity_score": 0.6357707977294922 - }, - { - "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", - "similarity_score": 0.63763827085495 - }, - { - "pdf_reference": "was used to record the diffuse reflectance spectra (DRS) of the samples.The reflectance spectra of the samples were analyzed under ambient condition in the wavelength range of 340-700 nm.Copper concentration in the catalysts was measured by atomic absorption spectroscopy (AAS) using Chem.Tech Analytical 2000 spectrophotometer.The specific surface area of catalysts was measured by multipoint BET of N 2 adsorption in a Quantachrome Autosorb-6.A SEM 515 PHILIPS scanning electron microscope (SEM) equipped with", - "similarity_score": 0.6471814513206482 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.6493161916732788 - } - ] - }, - { - "question_category": "reaction_medium", - "query": "\nPlease find the category of Reaction_medium from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Ph Value is mentioned in the experiment, Reaction_medium is always Liquid.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReaction_medium: ['Liquid', 'Gas']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReaction_medium: XXX\n", - "generation": { - "Reaction_medium": " Liquid" - }, - "evidence": [ - { - "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", - "similarity_score": 0.6080079674720764 - }, - { - "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", - "similarity_score": 0.6227010488510132 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.6527293920516968 - }, - { - "pdf_reference": "agitated the catalyst-suspended solution at the bottom to prevent sedimentation of the catalyst.The liquid sample was withdrawn every hour from a reactor column.The catalyst-suspended samples were centrifuged to separate catalyst particles from the liquid samples and then analyzed by a GC-FID of Shimadzu 8-ATC equipped with Porapak-Q column.", - "similarity_score": 0.6608579754829407 - }, - { - "pdf_reference": "the system.As a result of these sequential reactions (Eq.( 5)), the electron-hole recombination rate could be effectively reduced. To study the temperature dependence of the CO 2 photoreduction, the experiment was repeated at different temperatures in the range of 43-100 C.As can be seen from Fig. 5 the methanol yield increased for higher reaction temperatures.Saladin et al. 10 reported that the kinetic of CO 2 photoreduction can be modeled with the adsorption/desorption equilibrium of the various species", - "similarity_score": 0.6727200150489807 - } - ] - }, - { - "question_category": "reactor_type", - "query": "\nPlease find the category of Reactor_type from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReactor_type: ['Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReactor_type: XXX\n", - "generation": { - "Reactor_type": " Fixed-bed" - }, - "evidence": [ - { - "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", - "similarity_score": 0.5136107802391052 - }, - { - "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", - "similarity_score": 0.6327827572822571 - }, - { - "pdf_reference": "microscope (SEM) equipped with EDAX PV9900 PHILIPS energy dispersive X-ray (EDX) and Mapping was conducted at 15 and 22 kV to observe the morphology of catalysts, to perform semi-quantitative analysis, and to analyze distribution of Cu and Ti, respectively. The photocatalytic reactor configuration consists of a horizontal stainless steel vessel with an inner diameter and height of the vessel 140 and 50 mm, respectively.A top plate cover of the vessel was made of Pyrex and equipped with a gas bubbler and", - "similarity_score": 0.6621394157409668 - }, - { - "pdf_reference": "the system.As a result of these sequential reactions (Eq.( 5)), the electron-hole recombination rate could be effectively reduced. To study the temperature dependence of the CO 2 photoreduction, the experiment was repeated at different temperatures in the range of 43-100 C.As can be seen from Fig. 5 the methanol yield increased for higher reaction temperatures.Saladin et al. 10 reported that the kinetic of CO 2 photoreduction can be modeled with the adsorption/desorption equilibrium of the various species", - "similarity_score": 0.6621772050857544 - }, - { - "pdf_reference": "agitated the catalyst-suspended solution at the bottom to prevent sedimentation of the catalyst.The liquid sample was withdrawn every hour from a reactor column.The catalyst-suspended samples were centrifuged to separate catalyst particles from the liquid samples and then analyzed by a GC-FID of Shimadzu 8-ATC equipped with Porapak-Q column.", - "similarity_score": 0.667502760887146 - } - ] - }, - { - "question_category": "operation_mode", - "query": "\nPlease find the category of Operation_mode from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nOperation_mode: ['Batch', 'Continuous', 'Batch/Continuous']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nOperation_mode: XXX\n", - "generation": { - "Operation_mode": " Batch/Continuous" - }, - "evidence": [ - { - "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", - "similarity_score": 0.6429235935211182 - }, - { - "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", - "similarity_score": 0.6567009687423706 - }, - { - "pdf_reference": "of TiO 2 photocatalyst.The in situ IR experiments are still in progress to investigate the mechanism aspects of the catalyst.", - "similarity_score": 0.6839278340339661 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.6891406774520874 - }, - { - "pdf_reference": "was used to record the diffuse reflectance spectra (DRS) of the samples.The reflectance spectra of the samples were analyzed under ambient condition in the wavelength range of 340-700 nm.Copper concentration in the catalysts was measured by atomic absorption spectroscopy (AAS) using Chem.Tech Analytical 2000 spectrophotometer.The specific surface area of catalysts was measured by multipoint BET of N 2 adsorption in a Quantachrome Autosorb-6.A SEM 515 PHILIPS scanning electron microscope (SEM) equipped with", - "similarity_score": 0.6904171109199524 - } - ] - } - ] -} \ No newline at end of file diff --git a/CLI/CLI_old/code/data/paper1_extraction.json b/CLI/CLI_old/code/data/paper1_extraction.json deleted file mode 100644 index ccaf761..0000000 --- a/CLI/CLI_old/code/data/paper1_extraction.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "title": "Abstract", - "content": "Pure TiO 2 and various silver-enriched TiO 2 powders were prepared by the sol-gel process controlled in the reverse micellar environment.The catalysts were tested in CO 2 photocatalytic reduction and characterized by X-ray diffraction (XRD), nitrogen adsorption measurement and UV-vis.Methane and methanol were the main reduction products.The yield of methane and methanol increases when modifying the TiO 2 by silver incorporation is caused by two mechanisms: up to 5% of Ag in TiO 2 the Ag impurity band inside the TiO 2 bandgap decreases the absorption edge and increases so the electron-hole pair generation, above 5% of Ag in TiO 2 Ag metallic clusters are formed in TiO 2 crystals with Shottky barrier at the metal-semiconductor interface, which spatially separates electron and holes and increases their lifetime (decreases probability of their recombination)." - }, - { - "title": "Experimental", - "content": "Pure TiO 2 and various silver-enriched TiO 2 powders were prepared by the sol-gel process controlled in the reverse micellar environment.Pure TiO 2 was synthesized by the addition of Tita- The surface area and porous structure of prepared catalysts were evaluated from nitrogen adsorption isotherms by the BET and BJH methods, respectively.Nitrogen physical adsorption was performed on a volumetric apparatus ASAP2020 Micromeritics (USA).Before analyses the samples were dried at 105 \u2022 C for 24 h in vacuum (0.1 Pa). UV-vis diffuse reflectance spectra of samples were recorded using GBS CINTRA 303 spectrometer equipped with a diffuse reflectance attachment with a spectralon-coated integrating sphere against spectralon reference.The reflectances were recalculated to the absorption using the Schuster-Kubelka-Munk equation, where R \u221e is the diffuse reflectance from a semi-infinite layer.Absorption edge was estimated by extrapolating the decreasing portion of the spectrum to the abscissa at zero absorption.Ag-to-TiO 2 mass ratio in Ag/TiO 2 powders were determined by X-ray fluorescence (XRF) analysis using an ARL 9400 XP sequential WD-XRF spectrometer with the standard deviation lower than 5%. The purity (carbon content in weight %) of the powders was specified on an Elementar Vario EL III (Elementar).The detection limit of the apparatus was 0.1 wt.%. XRD powder diffraction patterns were obtained with the aid of a Seifert-FMP or a Panalytical-MPD laboratory diffractometer with Cu K\u2423 radiation in the conventional focusing Bragg-Brentano geometry in the diffraction angle range 2 = 10-90 \u2022 . The photocatalytic reduction of carbon dioxide was carried out in a homemade apparatus (Fig. 1). The photocatalytic reduction of carbon dioxide was carried out in a stirred batch annular reactor with a suspended catalyst illuminated by UV 8 W Hg lamp (254 nm).GC/FID/TCD was used for the analysis of gas and liquid reaction products.The details of the photocatalytic CO 2 reduction experiment and analytical methods were described in our previous publication [39].It is important to minimize the influence of transport phenomena during kinetic measurements.The elimination of CO 2 diffusion from the bulk of gas through the gas-liquid interface in a laboratory batch slurry reactor was accomplished by saturating the liquid with pure CO 2 before the reaction had been started [4,11].Catalyst loading of 1 g dm -3 was chosen to avoid concentration gradients in the bulk of stirred liquid with TiO 2 suspension due to the scattering effect of light caused by the high TiO 2 concentration [11,16,40,41].The determination of suitable volume of the liquid phase in our annular photoreactor to fulfil the requirement of perfect mixing has been published recently as 100 ml [42]. The accuracy of measurements was verified by series of repeated measurements.Average values were mentioned in this article.Maximal relative error was 5%." - }, - { - "title": "Results and discussion", - "content": "The basic textural properties of prepared titania and doped titania samples are summarised in Table 1 together with real Ag content determined by XRF.All samples possess a relatively high surface area; moreover, the positive effect of silver, which increases the surface area by about 15%, is clearly noticeable.It is obvious that the surface areas of Ag-doped TiO 2 samples varied only slightly and they are independent on the growth of Ag amount.The pore maxima of all samples are nearly the same without dependence on the Ag presence. The purity of all prepared samples was excellent, because the organic residual content was lower than 0.1 wt.%. XRD analysis confirmed the presence of the pure anatase crystallite structure for all samples.Ag was not detected in XRD pattern, probably Ag is situated in the bulk (inside the TiO 2 crystals) and/or Ag clusters are smaller than 0.3 nm.Fig. 2 shows the UV-vis spectra of Ag-TiO 2 catalysts.It is seen that (i) the maxima of the UV-vis band increased with the increasing Ag loading from 360 to 367 nm and (ii) the maximum absorption wavelength increased with increasing Ag loading, i.e. the absorption was shifted into the visible region for Ag-TiO 2 catalysts and the shift increased with the increasing amount of silver.The absorption edge (Fig. 2) values are given in Table 1. The effect of irradiation time on the formation of CO 2 photocatalytic reduction products was investigated over a period of 0-24 h on TiO 2 catalysts with different amount of Ag.Fig. 3 shows the evolution of all reaction products as the functions of the irradiation for a 7% Ag-TiO 2 catalyst.Two main products were determined: methane in the gas phase and methanol in the liquid phase.Hydrogen and low amounts of carbon monoxide were also detected.Other products such as formic acid, formaldehyde, ethane and ethylene could also be formed [2,16], but they were undetectable.The observed order of yields (mol/g cat ) order was: We assume on the basic of observed products the mechanism where the formation of carbon monoxide from \u2022 CO 2 -radical is the first step.Then both methane and methanol are formed in the parallel reactions.This mechanism was first suggested by Anpo [3]. Several authors [2,5,17] proposed the same mechanism with the exception that they predicted the formation of a carbon radical instead of surficial carbon. Comparisons of CH 4 , CH 3 OH, H 2 and CO yields over TiO 2 with different Ag content are shown in Figs.4-6A and B respectively.The yields of methane were negligible during the first 8 h of irradiation in all cases with the exception of catalyst 1 and 5% Ag-TiO 2 (Fig. 4).A similar trend of yield-time dependency was observed; only the shape for the sample with 7% Ag-TiO 2 was slightly different.A substantial increase of methane yield was observed after 8 h of irradiation.The highest yield of methane was observed for the 7% Ag-TiO 2 catalyst.The yields of methanol were one order of magnitude lower than the yields of methane (Fig. 5).Data at 5 h were measured but were under the limit of determinableness (12 g/l i.e. 0.38 mol/gcatalyst).The steepest curve of methanol yield was measured for the 7% Ag-TiO 2 catalyst. The hydrogen yields were small during the first 13 h of irradiation in all cases (Fig. 6A).After 15 h the hydrogen yields started increasing slowly.The yields of carbon monoxide increased slowly during the reaction but all yields were near the limit of detection (Fig. 6B). The data from the longest reaction time span were chosen for the comparison because the yields of all products were the highest and the most accurate.The effect of silver doping on the yields of the two main products (methane and methanol) related to the weight of the catalyst is depicted in Fig. 7.The photoreactivity of Ag-TiO 2 increases with the increase in the Ag dopant concentration.Both yields are the highest for 7% Ag-TiO 2 .The effect of the Ag dopant on the CO 2 photocatalytic reduction has not been studied yet.However, an optimum amount of Ag corresponding to the highest product yields was found for other photocatalytic reactions [27,28,34,36]. The yields of the two main products (methane and methanol) expressed per 1 m 2 of catalyst surface (Fig. 8) show similar trends with the highest yields obtained for 7% Ag-TiO 2 .This indicates that the specific surface area is not the most decisive parameter in the CO 2 photocatalytic reduction. Doping with metals or metal oxides could cause two effects: (1) Firstly, it creates impurity band inside the TiO 2 bandgap, thus shifting the absorption edge to the visible region [29].The decrease of the absorption edge and the shift of absorption wavelength into the visible region in this paper are clearly visible from Table 1 and Fig. 2, for Ag content up to 5%.The decrease of the absorption edge increases the electron and hole pair photogeneration, which causes yields increase of CO 2 photoreduction.(2) Secondly, for Ag content above 5% silver atoms are not anymore randomly located in TiO 2 crystal, but forms probably metallic clusters inside TiO 2 crystal.These metal clusters could provoke a decrease in the electron-hole recombination rate [43,44].This hypothesis was based on the fact that the Fermi level of TiO 2 is higher than that of silver metals [45].Such phenomenon would be expected to lead to the formation of a Schottky barrier at metal-semiconductor contact region, which causes the charge separation, the decrease of electron-hole recombination rate and consequently increase of the TiO 2 photocatalytic activity.The above mentioned fact could be the reason for observed yields increases with increased Ag content in our experiments with CO 2 photocatalytic reduction and is the main reason of yields increase with 7% Ag-TiO 2 since the absorption edge has same value as 5% Ag-TiO 2 . Our results are in agreement with other works dealing with photocatalytic activity of Ag-doped TiO 2 .For example, the presence of Ag in TiO 2 increased the yield of dye degradation [27,28,31,32], photooxidation of 2-propanol [29], degradation Fig. 8. Dependence of the product yields related to specific surface area (after 24 h of irradiation) on the different catalysts. of 1,4-dichlorobenzene [33] and degradation of oxalic acid [37]." - } -] \ No newline at end of file diff --git a/CLI/CLI_old/code/data/prompts.json b/CLI/CLI_old/code/data/prompts.json deleted file mode 100644 index b6dc5eb..0000000 --- a/CLI/CLI_old/code/data/prompts.json +++ /dev/null @@ -1 +0,0 @@ -{"catalyst/co_catalyst": "\nPlease find the name of the catalyst and co-catalyst name from the provided context which describe an solar chemical experiment.\nPlease only generate the name of the catalyst and nothing else. Such as TiO2. ZrO2 or Ag, etc.\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nIf there are multiple results, please indicate them as XXX-XXX.\ncatalyst: XXX\nco_catalyst: XXX\n", "light_source/lamp": "\nPlease find the category of Light_source and Lamp from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Light_source is Solar or Solar Simulator, Lamp is always Solar Simulator.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nLight_source: ['UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator']\nLamp: ['Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nLight_source: XXX\nLamp: XXX\n", "reaction_medium": "\nPlease find the category of Reaction_medium from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Ph Value is mentioned in the experiment, Reaction_medium is always Liquid.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReaction_medium: ['Liquid', 'Gas']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReaction_medium: XXX\n", "reactor_type": "\nPlease find the category of Reactor_type from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReactor_type: ['Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReactor_type: XXX\n", "operation_mode": "\nPlease find the category of Operation_mode from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nOperation_mode: ['Batch', 'Continuous', 'Batch/Continuous']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nOperation_mode: XXX\n"} \ No newline at end of file diff --git a/CLI/CLI_old/code/process.py b/CLI/CLI_old/code/process.py deleted file mode 100644 index 36e3655..0000000 --- a/CLI/CLI_old/code/process.py +++ /dev/null @@ -1,184 +0,0 @@ -from grobid_client.grobid_client import GrobidClient -import subprocess -from multiprocessing import Pool -import xml.etree.ElementTree as ET -import json -import pandas as pd -import re -import unicodedata -import logging -import threading -# from cli import * -# from cli import * - -# Configurar el registro (logging) -logging.basicConfig(level=logging.ERROR, format='%(asctime)s:%(levelname)s:%(message)s') - -# GROBID client configuration -client = GrobidClient(config_path="./setting/config.json") -service_name = "processFulltextDocument" - -def process_fulltext_document(service, file): - try: - rsp = client.process_pdf(service, file, - generateIDs=True, - consolidate_header=True, - consolidate_citations=False, - include_raw_citations=False, - include_raw_affiliations=False, - tei_coordinates=True, - segment_sentences=True) - if rsp[1] == 200: # HTTP status check - return rsp[2] # rsp[2] is the XML string - else: - print(f"Error: {rsp[1]}") - except Exception as e: - print(f"Exception during PDF processing: {e}") - return None - -def clean_text(text): - """ Limpia y normaliza el texto para eliminar caracteres no deseados. """ - text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii') # Normalización Unicode - text = re.sub(r'[^\x00-\x7F]+', ' ', text) # Eliminar todos los caracteres no ASCII - text = re.sub(r'[\u00b0\n\t\r]', ' ', text) # Eliminar caracteres específicos - text = re.sub(r'[^A-Za-z0-9\s,.?!;:()\-\'\"/]', '', text) # Mantener solo caracteres alfanuméricos y puntuación básica - text = re.sub(r'\s+', ' ', text).strip() # Reemplazar múltiples espacios por uno solo - return text - -def extract_sections_from_xml(xml_content): - """ Extrae secciones relevantes del contenido XML devuelto por Grobid. """ - root = ET.fromstring(xml_content) - ns = {'tei': 'http://www.tei-c.org/ns/1.0'} - - # Extraer título - title = '' - title_element = root.find(".//tei:title", ns) - if title_element is not None: - title = ''.join(title_element.itertext()).strip() - - # Función para extraer contenido bajo una etiqueta específica - def extract_content_by_tag(tag_name): - content = [] - path = f".//{{{ns['tei']}}}{tag_name}" - for elem in root.findall(path): - text = ''.join(elem.itertext()) - if text: - content.append(clean_text(text.strip())) - return " ".join(content) - - # Función para extraer contenido según palabras clave - def extract_content_by_keywords(start_keywords, end_section): - content = [] - capture = False - for elem in root.iter(): - if elem.tag.endswith("head"): - if elem.text and any(keyword.lower() in elem.text.lower() for keyword in start_keywords): - capture = True - elif elem.text and end_section.lower() in elem.text.lower(): - break - if capture and elem.tag.endswith("p"): - text = ''.join(elem.itertext()).strip() - if text: - content.append(clean_text(text)) - return " ".join(content) - - # Extracción de secciones - sections = [ - {"title": "Abstract", "content": extract_content_by_tag("abstract")}, - {"title": "Experimental", "content": extract_content_by_keywords( - ["Experimental", "Experimental studies", "Experiments", "Experimental methods", "Methods"], "Results and discussion")}, - {"title": "Results and discussion", "content": extract_content_by_keywords( - ["Results and discussion", "Result and discussion", "Results"], "Conclusion")}, - {"title": "Conclusions", "content": extract_content_by_keywords( - ["Conclusion", "Conclusions"], "Conclusion")}, - ] - - # Sección opcional "Supporting Information" - supporting_content = extract_content_by_keywords( - ["Supporting Information", "Supporting"], "Conclusion") - if supporting_content: - sections.append({"title": "Supporting Information", "content": supporting_content}) - - return sections - -def run_process(cmd): - subprocess.run(command, capture_output=True, shell=True) - -def process_paper(pdf_file_path): - """ Procesa un archivo PDF y devuelve una lista con objetos JSON por cada sección. """ - try: - # Procesar el PDF con Grobid para obtener el XML - xml_response = process_fulltext_document(service_name, pdf_file_path) - - if xml_response: - # Extraer secciones relevantes del XML y devolverlas como lista de objetos - sections = extract_sections_from_xml(xml_response) - return sections # Devolver el resultado como una lista de objetos JSON - else: - return [{"error": "Error processing the PDF."}] - except Exception as e: - logging.error(f"Error processing paper: {e}") - return [{"error": str(e)}] - - -# def get_parser(): -# parser = argparse.ArgumentParser(description="Demo of LLM Pipeline") -# parser.add_argument('--use_platform', type=lambda x:str2bool(x), default=True, help="the parameter of whether use online llm platform or use local model") -# parser.add_argument('--user_key', default="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", help="if use platform, enter your key for platform", type=str) -# parser.add_argument('--llm_id', default="llama-3.1-70b-versatile", help="the reference for the selected model, support grog model, huggingface llm or local model path ", type=str) -# parser.add_argument('--hf_key', default="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", help="your huggingface token", type=str) -# parser.add_argument('--llm_platform', default="groq", help='your platform choice', choices=["groq"], type=str) -# parser.add_argument('--sim_model_id', default='Salesforce/SFR-Embedding-Mistral', help="encoder model for RAG", type=str) -# parser.add_argument('--pdf_file_path', help='input data, extracted context from pdf', type=str) -# parser.add_argument('--prompt_file_pdf', help='queries', type=str) -# parser.add_argument('--context_file_path', help='save context file', type=str) -# parser.add_argument('--grobid_path', help='the directoray of your grobid location', type=str) -# return parser - - -# def main(): -# parser = get_parser() -# args = parser.parse_args() -# args_dict = vars(args) -# print(args_dict) -# prompt_file_pdf = args_dict["prompt_file_pdf"] -# grobid_path = args_dict["grobid_path"] -# global command -# command = f"cd ~; cd {grobid_path}; ./gradlew run" -# # DETACHED_PROCESS = 0x00000008 -# # subprocess.Popen(["bash", "run_grobid.sh"], creationflags=DETACHED_PROCESS) -# # subprocess.Popen(["bash", "run_grobid.sh"], close_fds=True) -# # subprocess.Popen(["bash", "run_grobid.sh"]) -# print("Grobid is running") -# global client -# client = GrobidClient(config_path="./setting/config.json") -# global service_name -# service_name = "processFulltextDocument" -# del args_dict["prompt_file_pdf"] -# del args_dict["grobid_path"] -# start_time = time.time() -# solar = SolarQA(**args_dict) -# print("--- %s Data Preparation and Model Loading time consumption: seconds ---" % (time.time() - start_time)) -# temp_time = time.time() -# with open(prompt_file_pdf, "rb") as f: -# query_data = json.load(f) -# solar.generation(query_data=query_data) -# print(solar.result) -# print("--- %s Model generation time consumption: seconds ---" % (time.time() - temp_time)) -# solar.save_context() - -# pool = Pool(2) -# pool.apply_async(subprocess.Popen(["bash", "run_grobid.sh"]), (0,)) -# pool.apply_async(main(), (0,)) -# # main() - -# # Configurar el registro (logging) -# logging.basicConfig(level=logging.ERROR, format='%(asctime)s:%(levelname)s:%(message)s') - -# # GROBID client configuration -# client = GrobidClient(config_path="./setting/config.json") -# service_name = "processFulltextDocument" - - - -# print(type(process_paper("/home/jovyan/grob/test.pdf"))) \ No newline at end of file diff --git a/CLI/CLI_old/code/run_benchmark.py b/CLI/CLI_old/code/run_benchmark.py deleted file mode 100644 index 375a6c8..0000000 --- a/CLI/CLI_old/code/run_benchmark.py +++ /dev/null @@ -1,247 +0,0 @@ -import os -import time -from grobid_client.grobid_client import GrobidClient -import json -import argparse - -import torch -import transformers -from huggingface_hub import login -from langchain_groq import ChatGroq -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.vectorstores import FAISS -from langchain_community.embeddings import HuggingFaceEmbeddings -from langchain.prompts import PromptTemplate -from langchain.chains import RetrievalQA -from process import * - - -def get_context(context): - res = [] - for item in context: - res.append(item.page_content) - return res - -def clean_gen(gen): - res = {} - for line in gen.split("\n"): - if ":" in line: - try: - s, e = line.split(":") - res[s.strip()] = e - except: - pass - return res - -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") - -class SolarQA: - def __init__(self, use_platform, user_key, llm_id, hf_key, llm_platform="LOCAL", temperature=0.1, sim_model_id="Salesforce/SFR-Embedding-Mistral", input_file_path=str(), context_file_path=""): - # print(input_file_path) - self.use_platform = use_platform - self.llm_id = llm_id - self.user_key = user_key - self.hf_key = hf_key - self.llm_platform = llm_platform.lower() - self.temperature = temperature - self.sim_model_id = sim_model_id - self.sys_prompt = """ - You are an assistant for extract information from context and selection the possible answer from the selection provided. - You are given the extracted parts of a paper about solar chemistry and a question. Provide the extracted information and nothing else. - """ - self.context_file_path = context_file_path - # self.data = data - self.input_file_path = input_file_path - - self.context_result = { - "generation_model": self.llm_id, - "similarity_model": self.sim_model_id, - "similarity_metric": "Cosine_Similarity", - "result": [] - } - login(self.hf_key) - self.get_text() - self.get_vector() - print("¡¡¡Vector Store Database is prepared!!!") - self.get_llm() - - def get_text(self): - title_list = ["Abstract", "Experimental", "Results and discussion"] - # with open(self.json_path, "rb") as f: - # data = json.load(f) - # print(self.input_file_path) - if self.input_file_path[-3:] == "pdf": - data = process_paper(self.input_file_path) - else: - with open(self.input_file_path, "rb") as f: - data = json.load(f) - # print("¡¡¡PDF file has been extracted!!!") - self.context = "" - for section in data: - if section["title"] in title_list: - self.context += section["title"] - self.context += "\n" - self.context += section["content"] - self.context += "\n" - - def get_llm(self): - if self.use_platform: - if self.llm_platform == "groq": - os.environ["GROQ_API_KEY"] = self.user_key - self.llm = ChatGroq(temperature=self.temperature, model_name=self.llm_id) - else: - raise ValueError('Unsupportted Platform') - else: - try: - bnb_config = transformers.BitsAndBytesConfig( - load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", - bnb_4bit_compute_dtype=torch.bfloat16 - ) - self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.llm_id) - self.llm = transformers.AutoModelForCausalLM.from_pretrained( - self.llm_id, - torch_dtype=torch.bfloat16, - device_map="auto", - quantization_config=bnb_config - ) - self.terminators = [ - self.tokenizer.eos_token_id, - self.tokenizer.convert_tokens_to_ids("<|eot_id|>") - ] - except: - raise ValueError('Unsupportted Platform') - - def get_vector(self): - model_kwargs = {"device": "cpu"} - self.sim_model = HuggingFaceEmbeddings(model_name=self.sim_model_id, model_kwargs=model_kwargs) - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=512, - chunk_overlap=32, - length_function=len - ) - chunks = text_splitter.split_text(text=self.context) - self.vector_store = FAISS.from_texts(chunks, embedding=self.sim_model, normalize_L2=True, distance_strategy="COSINE") - - def search(self, query, k): - embed_q = self.sim_model.embed_query(query) - self.context = self.vector_store.similarity_search_with_score_by_vector(embed_q, k) - - def format_prompt(self, query, k): - self.search(query, k) - prompt = self.sys_prompt + "\n" + "Question:" - prompt += query - prompt += "\n" - prompt += "Context:" - for i in range(k): - prompt += f"{self.context[i]}\n" - return prompt - - def generation(self, query_data): - res = "" - if self.use_platform: - for key, query in query_data.items(): - new_prompt = self.format_prompt(query, 5) - messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] - outputs = self.llm.invoke(messages) - response = outputs.content - temp_res = { - "question_category": key, - "query": query, - "generation": clean_gen(response), - "evidence": [] - } - for i in range(len(self.context)): - context = self.context[i][0].page_content - sim_score = float(self.context[i][1]) - temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) - self.context_result["result"].append(temp_res) - res += response - res += "\n" - self.result = clean_gen(res) - else: - for key, query in query_data.items(): - new_prompt = self.format_prompt(query, 5) - messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] - input_ids = self.tokenizer.apply_chat_template( - messages, - add_generation_prompt=True, - return_tensors="pt" - ) - outputs = self.llm.generate( - input_ids, - max_new_tokens=1024, - eos_token_id=self.terminators, - do_sample=True, - temperature=self.temperature, - top_p=0.9, - ) - response = self.tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True) - temp_res = { - "question_category": key, - "query": query, - "generation": clean_gen(response), - "evidence": [] - } - for i in range(len(self.context)): - context = self.context[i][0].page_content - sim_score = float(self.context[i][1]) - temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) - self.context_result["result"].append(temp_res) - res += response - res += "\n" - self.result = clean_gen(res) - - def save_context(self): - # print(self.context_result) - with open(self.context_file_path, "w") as f: - json.dump(self.context_result, f) - print(f"RAG context is saved at: {self.context_file_path}") - - -def run_bench(input_folder, output_folder): - dir_list = os.listdir(input_folder) - for i in range(len(dir_list)): - args_dict = { - "use_platform": str2bool("False"), - "user_key": "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", - "llm_id": "meta-llama/Llama-3.2-3B-Instruct", - "hf_key": "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", - "llm_platform": "groq", - "sim_model_id": "Salesforce/SFR-Embedding-Mistral", - "input_file_path": "", - "prompt_file_pdf": "/home/jovyan/Solar/CLI/data/prompts.json", - "context_file_path": "" - } - input_dir = input_folder + f"paper_{i+1}.json" - output_dir = output_folder + f"result_{i+1}.json" - # result_list = os.listdir(output_folder) - if os.path.exists(input_dir): - if os.path.exists(output_dir): - pass - else: - args_dict["input_file_path"] = input_dir - args_dict["context_file_path"] = output_dir - print(args_dict) - prompt_file_pdf = args_dict["prompt_file_pdf"] - del args_dict["prompt_file_pdf"] - start_time = time.time() - solar = SolarQA(**args_dict) - print("--- %s Data Preparation and Model Loading time consumption: seconds ---" % (time.time() - start_time)) - temp_time = time.time() - with open(prompt_file_pdf, "rb") as f: - query_data = json.load(f) - solar.generation(query_data=query_data) - print(solar.result) - print("--- %s Model generation time consumption: seconds ---" % (time.time() - temp_time)) - solar.save_context() - - - - -input_folder = "/home/jovyan/Solar/data/paper_all/" -output_folder = "/home/jovyan/Solar/CLI/context/" -run_bench(input_folder, output_folder) - - - \ No newline at end of file diff --git a/CLI/CLI_old/code/run_grobid.sh b/CLI/CLI_old/code/run_grobid.sh deleted file mode 100644 index 5e329cc..0000000 --- a/CLI/CLI_old/code/run_grobid.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# # download GROBID if directory does not exist -declare -r GROBID_VERSION="0.8.0" # or change to current stable version 0.6.2 - -# if [ ! -d grobid-${GROBID_VERSION} ]; then -# wget https://github.com/kermitt2/grobid/archive/${GROBID_VERSION}.zip -# unzip "${GROBID_VERSION}.zip" -# rm "${GROBID_VERSION}.zip" -# fi - - -# run GROBID -cd ~ -cd /home/jovyan/Solar/CLI/grobid-0.8.0 -./gradlew run \ No newline at end of file diff --git a/CLI/CLI_old/code/setting/config.json b/CLI/CLI_old/code/setting/config.json deleted file mode 100644 index 789c3fa..0000000 --- a/CLI/CLI_old/code/setting/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "grobid_server": "http://localhost:8070", - "batch_size": 1000, - "sleep_time": 5, - "timeout": 60, - "coordinates": ["persName", "figure", "ref", "biblStruct", "formula", "s"] -} diff --git a/CLI/README.md b/CLI/README.md deleted file mode 100644 index 1ff6457..0000000 --- a/CLI/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# SolarRAG CLI - -> **_Solar CLI:_** This CLI contains SolarFactRAG and SolarNaiveRAG pipeline for [solar-qa pipeline](https://github.com/oeg-upm/solar-qa-eval) - -## Install - -#### 1. install all python packages - -```console -pip install ollama networkx leidenalg cdlib python-igraph python-dotenv langchain huggingface_hub langchain-ollama==0.1.0 langchain-community==0.2.19 sentence-transformers==2.7.0 grobid-client-python==0.0.3 -``` - -#### 2. install ollama - -more details about ollama please visit the [offical ollama website](https://ollama.com/) - -```console -curl -fsSL https://ollama.com/install.sh | sh -``` - -#### 3. install grobid client - -please reference this part to the [offical grobid github page](https://grobid.readthedocs.io/en/latest/Install-Grobid/) - -## Run Solar CLI - -After installed all prerequisite libraries and software, you can simply run the cli by: - -```console -python SolarRAG.py --llm_id llama3.2:3b --embedding_id nomic-embed-text --input_file_path XXX/paper_2.pdf --prompt_file XXX/prompt.json --context_file_path XXX/test.json --rag_type fact -``` - -Here is a table that describe the parameters to run the FactRAG cli - -| Parameter | Definition | DataType | Example | -| -------- | ------- | ------- | ------- | -| llm_id | the parameter of which LLM model from ollama to use | String | [llama3.2:3b](https://ollama.com/library/llama3.2) | -| embedding_id | the parameter of which embedding model from ollama to use | String | [nomic-embed-text](https://ollama.com/library/nomic-embed-text) | -| input_file_path | path for input data, pdf file or extracted json file | String | ../paper_1.pdf | -| prompt_file | path for the prompt json file | String | ../code/prompt.json | -| context_file_path | path for save the output json file | String | ../FactRAG/context.json | -| rag_type | the type of rag pipeline, range=['fact', 'naive'] | String | fact | \ No newline at end of file diff --git a/CLI/Result_Example/SolarFactRAG_Example.json b/CLI/Result_Example/SolarFactRAG_Example.json deleted file mode 100644 index 6eac6b7..0000000 --- a/CLI/Result_Example/SolarFactRAG_Example.json +++ /dev/null @@ -1 +0,0 @@ -{"paper_title": "Photocatalytic Reduction of Greenhouse Gas CO2 to Fuel", "DOI": "10.1007/s10563-009-9065-9", "generation_model": "llama3.2:3b", "similarity_model": "nomic-embed-text", "similarity_metric": "Cosine_Similarity", "rag_type": "fact", "result": [{"question_category": "catalyst", "query": "What is the chemical name of the catalyst used in the experiment?", "generation": "###catalyst:\nTTIP (Titanium(IV) Isopropoxide).", "RAG_source": "generated_facts", "selected_answer": {"catalyst": "\nTTIP (Titanium(IV) Isopropoxide)."}, "evidences": [{"similairty_score": 0.6745214462280273, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.6460967063903809, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed.", "generated_facts": "Facts:\n1. Hydrocarbons are produced by Methane.\n2. Hydrocarbons are produced by Ethylene.\n3. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Methane.\n4. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Ethylene.\n5. The Dye-sensitized Cu-Fe/P25 photocatalyst harnesses Light energy.\n6. The Dye-sensitized Cu-Fe/P25 photocatalyst is enhanced by Sunlight.\n7. CO2 (carbon dioxide) is photoreduced by In situ IR spectroscopy.\n8. Methane is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst.\n9. Ethylene is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst."}, {"similairty_score": 0.6339464783668518, "pdf_reference": "0 -bipyridyl-4,4 0 -dicarboxylate) 2 -(NCS) 2 (also called N3-dye, Solaronix) was used as a dye sensitizer that was dissolved in ethanol (99.5) to obtain a 3 mM dye solution. P25 slurry with corresponding metal salts was prepared by adding aqueous PEG solution with the metal salts to P25 TiO2 fiber Fig. 2 The schematic of light transmission and spread of TiO 2 coated optical fiber powder in a mortar under vigorous grinding with pestle 36.The prepared uniform lump-free slurry was coated on optical fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding", "generated_facts": "Facts:\n1. -bipyridyl-4,4'-dicarboxylate is used as a chemical compound.\n2. NCS is used as a chemical compound.\n3. P25 TiO2 fiber is coated with P25 TiO2.\n4. Metal salts are added to PEG solution to prepare slurry.\n5. Solaronix (N3-dye) is used as a dye.\n6. Ethanol (99.5%) is used as a solvent for the dye solution.\n7. Optical fibers are coated using dip-coating method.\n8. Grinding is performed using mortar and pestle.\n9. P25 TiO2 fiber is used as a photocatalyst material."}, {"similairty_score": 0.6232354044914246, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced", "generated_facts": "Facts: \n1. The optical-fiber photoreactor is designed to be coated with Ag/TiO\u2082 photocatalyst-coated fibers.\n2. The optical-fiber photoreactor comprises nearly 120 Ag/TiO\u2082 photocatalyst-coated fibers.\n3. The photocatalyst-coated fibers are coated with Anatase phase.\n4. All photocatalysts contain the Anatase phase.\n5. Increasing UV light intensity increases methanol yield.\n6. The UV light intensity is set to 10 W/cm\u00b2.\n7. The Ag/TiO\u2082 photocatalyst is used under a light intensity of 10 W/cm\u00b2.\n8. The TiO\u2082-SiO\u2082 mixed oxide is doped with Cu metal and Fe metal.\n9. Photocatalysts exhibit substantial differences in hydrocarbon production.\n10. Hydrocarbon production is affected by product selectivity.\n11. Product selectivity is influenced by Cu metal and Fe metal.\n12. The TiO\u2082-SiO\u2082 mixed oxide doped with Cu and Fe metals produces methane.\n13. The TiO\u2082-SiO\u2082 mixed oxide doped with Cu and Fe metals also produces ethylene."}, {"similairty_score": 0.6223254799842834, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}]}, {"question_category": "co_catalyst", "query": "What is the chemical name of the co_catalyst used in the experiment?", "generation": "###co_catalyst:\nacac", "RAG_source": "generated_facts", "selected_answer": {"co_catalyst": "\nacac"}, "evidences": [{"similairty_score": 0.65687096118927, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.6408281922340393, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed.", "generated_facts": "Facts:\n1. Hydrocarbons are produced by Methane.\n2. Hydrocarbons are produced by Ethylene.\n3. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Methane.\n4. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Ethylene.\n5. The Dye-sensitized Cu-Fe/P25 photocatalyst harnesses Light energy.\n6. The Dye-sensitized Cu-Fe/P25 photocatalyst is enhanced by Sunlight.\n7. CO2 (carbon dioxide) is photoreduced by In situ IR spectroscopy.\n8. Methane is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst.\n9. Ethylene is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst."}, {"similairty_score": 0.6391236186027527, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was", "generated_facts": "Facts: \n1. Thermocouple is placed at Reactor.\n2. Reactor was purged by CO2 gas.\n3. Reactor was purged by Distilled water.\n4. CO2 gas bubbled through Distilled water.\n5. Space velocity of CO2 gas and H2O vapor.\n6. Space velocity of CO2 gas and H2O vapor was maintained at nearly 0.72 h-1.\n7. Photo reactions were carried out at steady state by Reactor.\n8. Photo reactions lasted for 6-24 h.\n9. Catalysts were repeated two or three times on Reactor.\n10. Catalysts deactivation was found to be negligible.\n11. Outlet gases were analyzed by GC equipment.\n12. GC equipment was equipped with FID and Porapak Q column.\n13. TiO2 underwent thermal-hydrolysis.\n14. CO2 was photoreduced on Thermal-hydrolyzed TiO2."}, {"similairty_score": 0.6241745948791504, "pdf_reference": "0 -bipyridyl-4,4 0 -dicarboxylate) 2 -(NCS) 2 (also called N3-dye, Solaronix) was used as a dye sensitizer that was dissolved in ethanol (99.5) to obtain a 3 mM dye solution. P25 slurry with corresponding metal salts was prepared by adding aqueous PEG solution with the metal salts to P25 TiO2 fiber Fig. 2 The schematic of light transmission and spread of TiO 2 coated optical fiber powder in a mortar under vigorous grinding with pestle 36.The prepared uniform lump-free slurry was coated on optical fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding", "generated_facts": "Facts:\n1. -bipyridyl-4,4'-dicarboxylate is used as a chemical compound.\n2. NCS is used as a chemical compound.\n3. P25 TiO2 fiber is coated with P25 TiO2.\n4. Metal salts are added to PEG solution to prepare slurry.\n5. Solaronix (N3-dye) is used as a dye.\n6. Ethanol (99.5%) is used as a solvent for the dye solution.\n7. Optical fibers are coated using dip-coating method.\n8. Grinding is performed using mortar and pestle.\n9. P25 TiO2 fiber is used as a photocatalyst material."}, {"similairty_score": 0.6187817454338074, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}]}, {"question_category": "Light_source", "query": "From the choices provided, which light source is used in the experiment?", "generation": "###Light_source: 'Solar'", "RAG_source": "generated_facts", "selected_answer": {"Light_source": " 'Solar'"}, "evidences": [{"similairty_score": 0.6323468089103699, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.6295710802078247, "pdf_reference": "transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig. 3.The fibers with length of nearly 11 cm long were inserted into the OFPR.The optical fibers were supported on circular plates with a diameter of 5.0 cm.The OFPR was irradiated by artificial light or concentrated natural sunlight through the quartz window at one side.The artificial light (320-500 nm) was supplied by Exfo Acticure 4,000 with a highest intensity at 365 nm using an appropriate color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar", "generated_facts": "Facts:\n1. Light is transmitted by a fiber-supported TiO2 film.\n2. A fiber-supported TiO2 film is inserted into fibers.\n3. Fibers are supported on circular plates.\n4. Circular plates have a diameter of 5.0 cm.\n5. An OFPR module is irradiated with artificial light or concentrated natural sunlight.\n6. Artificial light is supplied by the Exfo Acticure 4,000.\n7. The Exfo Acticure 4,000 uses an appropriate color filter.\n8. Light intensity can be measured with a Lumen meter (Exfo).\n9. Solar irradiates an OFPR module.\n10. Concentrated natural sunlight enters through a quartz window.\n\nInferred Facts:\n11. TiO2 is used in a fiber-supported TiO2 film.\n12. An OFPR processes and repairs fibers.\n13. A quartz window allows concentrated natural sunlight to enter."}, {"similairty_score": 0.629106342792511, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The", "generated_facts": "Facts:\n1. The Lumen meter measures light intensity.\n2. The solar concentrator tracks the sun's trajectory in Japan.\n3. The solar concentrator collects natural sunlight.\n4. The reflection dish of the solar concentrator also tracks the sun's trajectory.\n5. Optical cables transmit concentrated sunlight.\n6. Windows receive focused sunlight from the optical cables.\n7. Photoreactors contain windows that receive sunlight.\n8. Heating tapes wrap around photoreactors to regulate temperature.\n9. Temperature controllers control the heating tape temperature.\n10. Thermocouples measure reactor temperatures.\n11. Days have maximum sunlight intensity.\n12. The solar concentrator is used to collect natural sunlight in Japan.\n13. Lumen meters can be used to tune and measure light intensity.\n\nNote: I've rephrased some of the relationships to make them more concise and clear, while maintaining their original meaning."}, {"similairty_score": 0.6268292665481567, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}, {"similairty_score": 0.6255613565444946, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption", "generated_facts": "Facts:\n1. The electric heater is connected to a sample cup.\n2. The temperature controller is directly connected to a thermocouple and a heater.\n3. The HVC has a thermocouple and a heater.\n4. The EXFO manufactured the OmniCure 1000.\n5. The OmniCure 1000 is a type of UV light source.\n6. The 100 W Hg lamp is a type of UV light source.\n7. The optical cable is connected to a quartz window.\n8. The quartz window has UV light (light from the UV light source).\n9. The TiO2 film absorbs light with wavelength below 380 nm."}]}, {"question_category": "Lamp", "query": "From the choices provided, which lamp is used in the experiment?", "generation": "###Lamp: 'LED'", "RAG_source": "generated_facts", "selected_answer": {"Lamp": " 'LED'"}, "evidences": [{"similairty_score": 0.6355500221252441, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.6323181986808777, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption", "generated_facts": "Facts:\n1. The electric heater is connected to a sample cup.\n2. The temperature controller is directly connected to a thermocouple and a heater.\n3. The HVC has a thermocouple and a heater.\n4. The EXFO manufactured the OmniCure 1000.\n5. The OmniCure 1000 is a type of UV light source.\n6. The 100 W Hg lamp is a type of UV light source.\n7. The optical cable is connected to a quartz window.\n8. The quartz window has UV light (light from the UV light source).\n9. The TiO2 film absorbs light with wavelength below 380 nm."}, {"similairty_score": 0.6160007119178772, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}, {"similairty_score": 0.6134966611862183, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The", "generated_facts": "Facts:\n1. The Lumen meter measures light intensity.\n2. The solar concentrator tracks the sun's trajectory in Japan.\n3. The solar concentrator collects natural sunlight.\n4. The reflection dish of the solar concentrator also tracks the sun's trajectory.\n5. Optical cables transmit concentrated sunlight.\n6. Windows receive focused sunlight from the optical cables.\n7. Photoreactors contain windows that receive sunlight.\n8. Heating tapes wrap around photoreactors to regulate temperature.\n9. Temperature controllers control the heating tape temperature.\n10. Thermocouples measure reactor temperatures.\n11. Days have maximum sunlight intensity.\n12. The solar concentrator is used to collect natural sunlight in Japan.\n13. Lumen meters can be used to tune and measure light intensity.\n\nNote: I've rephrased some of the relationships to make them more concise and clear, while maintaining their original meaning."}, {"similairty_score": 0.6098795533180237, "pdf_reference": "transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig. 3.The fibers with length of nearly 11 cm long were inserted into the OFPR.The optical fibers were supported on circular plates with a diameter of 5.0 cm.The OFPR was irradiated by artificial light or concentrated natural sunlight through the quartz window at one side.The artificial light (320-500 nm) was supplied by Exfo Acticure 4,000 with a highest intensity at 365 nm using an appropriate color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar", "generated_facts": "Facts:\n1. Light is transmitted by a fiber-supported TiO2 film.\n2. A fiber-supported TiO2 film is inserted into fibers.\n3. Fibers are supported on circular plates.\n4. Circular plates have a diameter of 5.0 cm.\n5. An OFPR module is irradiated with artificial light or concentrated natural sunlight.\n6. Artificial light is supplied by the Exfo Acticure 4,000.\n7. The Exfo Acticure 4,000 uses an appropriate color filter.\n8. Light intensity can be measured with a Lumen meter (Exfo).\n9. Solar irradiates an OFPR module.\n10. Concentrated natural sunlight enters through a quartz window.\n\nInferred Facts:\n11. TiO2 is used in a fiber-supported TiO2 film.\n12. An OFPR processes and repairs fibers.\n13. A quartz window allows concentrated natural sunlight to enter."}]}, {"question_category": "Reaction_medium", "query": "From the choices provided, which reaction medium is used in the experiment?", "generation": "###Reaction_medium: Liquid", "RAG_source": "generated_facts", "selected_answer": {"Reaction_medium": " Liquid"}, "evidences": [{"similairty_score": 0.6295759081840515, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.5933018922805786, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.5931262969970703, "pdf_reference": "0 -bipyridyl-4,4 0 -dicarboxylate) 2 -(NCS) 2 (also called N3-dye, Solaronix) was used as a dye sensitizer that was dissolved in ethanol (99.5) to obtain a 3 mM dye solution. P25 slurry with corresponding metal salts was prepared by adding aqueous PEG solution with the metal salts to P25 TiO2 fiber Fig. 2 The schematic of light transmission and spread of TiO 2 coated optical fiber powder in a mortar under vigorous grinding with pestle 36.The prepared uniform lump-free slurry was coated on optical fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding", "generated_facts": "Facts:\n1. -bipyridyl-4,4'-dicarboxylate is used as a chemical compound.\n2. NCS is used as a chemical compound.\n3. P25 TiO2 fiber is coated with P25 TiO2.\n4. Metal salts are added to PEG solution to prepare slurry.\n5. Solaronix (N3-dye) is used as a dye.\n6. Ethanol (99.5%) is used as a solvent for the dye solution.\n7. Optical fibers are coated using dip-coating method.\n8. Grinding is performed using mortar and pestle.\n9. P25 TiO2 fiber is used as a photocatalyst material."}, {"similairty_score": 0.5924503207206726, "pdf_reference": "The TiO 2 solutions to be on optical fibers were prepared by the thermal hydrolysis method.Titanium (IV) butoxide and polyethylene glycol (PEG, molecular weight of 20,000, Merck, Darmstadt, Germany) were added to a 0.1 M nitric acid solution.The volume ratio of titanium butoxide to HNO 3 was 1:6, and the weight of PEG was 50 of that of TiO 2 .The appearance of pure TiO 2 solution was milky and slightly transparent.PEG was added to prevent cracking during the drying and calcination of the film.Furthermore, it tends to increase the viscosity of the solution as well as the uniformity TiO 2", "generated_facts": "Facts:\n1. Titanium (IV) butoxide has component TiO2.\n2. TiO2 will contain TiO2 solution.\n3. Polyethylene glycol (PEG) has molecular weight 20,000.\n4. Merck is obtained from PEG.\n5. Nitric acid solution is added to TiO2 with a volume ratio of 1:6 to Titanium (IV) butoxide.\n6. TiO2 solution has concentration of nitric acid in M.\n7. Thermal hydrolysis method involves hydrolysis reaction.\n8. TiO2 solutions are prepared by thermal hydrolysis method.\n9. Film formation involves drying and calcination of TiO2 solution.\n10. Drying is part of film formation process for TiO2 solution.\n11. Calcination is part of film formation process for TiO2 solution.\n12. Optical fibers will contain TiO2 solution.\n13. Titanium (IV) butoxide is added to 0.1 M nitric acid solution."}, {"similairty_score": 0.5891863703727722, "pdf_reference": "fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding photocatalyst into the 3 mM dye solution for 24 h.Subsequently, dye-adsorbed photocatalyst was rinsed with ethanol (99.5) to obtain a monolayer of dye on the photocatalyst surface.The resulting dye-adsorbed photocatalyst was finally put in an oven at 80 C for 30 min to remove the residue solvent on the surface of the photocatalyst. An optical-fiber photoreactor (OFPR) was designed and assembled to transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig.", "generated_facts": "Facts:\n1. Fibers are obtained by dip-coating method.\n2. Dye-adsorbed photocatalyst is obtained by dipping into 3 mM dye solution for 24 h.\n3. Dye-adsorbed photocatalist is rinsed with ethanol (99.5%).\n4. Dye-adsorbed photocatalist is put in an oven at 80 C for 30 min to remove residue solvent on the surface of the photocatalyst.\n5. Optical-fiber photoreactor (OFPR) is designed and assembled to transmit light to TiO 2 film.\n6. Photocatalyst is dipped into 3 mM dye solution for 24 h to obtain dye-adsorbed photocatalyst.\n7. Dye-adsorbed photocatalist is put in an oven at 80 C for 30 min to remove residue solvent on the surface of the photocatalyst (same as relationship 4).\n8. TiO 2 film transmits light from one side of the OFPR module to optical-fiber photoreactor (OFPR)."}]}, {"question_category": "Reactor_type", "query": "From the choices provided, which reactor type is used in the experiment?", "generation": "###Reactor_type: Fluidised-bed", "RAG_source": "generated_facts", "selected_answer": {"Reactor_type": " Fluidised-bed"}, "evidences": [{"similairty_score": 0.6250825524330139, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.5985174775123596, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was", "generated_facts": "Facts: \n1. Thermocouple is placed at Reactor.\n2. Reactor was purged by CO2 gas.\n3. Reactor was purged by Distilled water.\n4. CO2 gas bubbled through Distilled water.\n5. Space velocity of CO2 gas and H2O vapor.\n6. Space velocity of CO2 gas and H2O vapor was maintained at nearly 0.72 h-1.\n7. Photo reactions were carried out at steady state by Reactor.\n8. Photo reactions lasted for 6-24 h.\n9. Catalysts were repeated two or three times on Reactor.\n10. Catalysts deactivation was found to be negligible.\n11. Outlet gases were analyzed by GC equipment.\n12. GC equipment was equipped with FID and Porapak Q column.\n13. TiO2 underwent thermal-hydrolysis.\n14. CO2 was photoreduced on Thermal-hydrolyzed TiO2."}, {"similairty_score": 0.5907428860664368, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}, {"similairty_score": 0.5826471447944641, "pdf_reference": "equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was studied using diffusive reflectance infrared Fourier transform (DRIFT) under UV irradiation.Zero-grade air, high-purity He and ultra-purity CO 2 (99.999 v), from Air Products (USA) were used in the IR system.In order to reduce water interference, air and He were passed through a moisture trapper before entering the photoreactor. A high-temperature chamber (HVC, Harrick HVC-DRP-1) made of 316 stainless steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were", "generated_facts": "Here are the simple fact sentences constructed by combining the given relationships:\n\nFacts:\n1. The Fused Inertial Detector is equipped with a Porapak Q column.\n2. Titanium Dioxide was studied on Carbon Dioxide using DRIFT spectroscopy.\n3. Air Products supplied ultra-purity CO2 (99.999 v) for use in the experiment.\n4. Helium passed through a moisture trapper before entering the photoreactor.\n5. Zero-grade air passed through a moisture trapper before entering the photoreactor.\n6. High-purity helium was used in the experiment.\n7. The HVC-DRP-1 is made of 316 stainless steel.\n8. The photoreactor is used as a high-temperature chamber, Harrick model.\n9. The IR system is used in the photoreduction of CO2 on TiO2.\n10. Air was passed through a moisture trapper before entering the photoreactor.\n\nNote that I did not include relationship 13 as it is an implicit repetition of relationship 7 and does not provide any new information."}, {"similairty_score": 0.5698617696762085, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}]}, {"question_category": "Operation_mode", "query": "From the choices provided, which oepration mode is used in the experiment?", "generation": "###Operation_mode: Batch/Continuous", "RAG_source": "generated_facts", "selected_answer": {"Operation_mode": " Batch/Continuous"}, "evidences": [{"similairty_score": 0.5681795477867126, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.5528874397277832, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.5436047315597534, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption", "generated_facts": "Facts:\n1. The electric heater is connected to a sample cup.\n2. The temperature controller is directly connected to a thermocouple and a heater.\n3. The HVC has a thermocouple and a heater.\n4. The EXFO manufactured the OmniCure 1000.\n5. The OmniCure 1000 is a type of UV light source.\n6. The 100 W Hg lamp is a type of UV light source.\n7. The optical cable is connected to a quartz window.\n8. The quartz window has UV light (light from the UV light source).\n9. The TiO2 film absorbs light with wavelength below 380 nm."}, {"similairty_score": 0.5275892615318298, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The", "generated_facts": "Facts:\n1. The Lumen meter measures light intensity.\n2. The solar concentrator tracks the sun's trajectory in Japan.\n3. The solar concentrator collects natural sunlight.\n4. The reflection dish of the solar concentrator also tracks the sun's trajectory.\n5. Optical cables transmit concentrated sunlight.\n6. Windows receive focused sunlight from the optical cables.\n7. Photoreactors contain windows that receive sunlight.\n8. Heating tapes wrap around photoreactors to regulate temperature.\n9. Temperature controllers control the heating tape temperature.\n10. Thermocouples measure reactor temperatures.\n11. Days have maximum sunlight intensity.\n12. The solar concentrator is used to collect natural sunlight in Japan.\n13. Lumen meters can be used to tune and measure light intensity.\n\nNote: I've rephrased some of the relationships to make them more concise and clear, while maintaining their original meaning."}, {"similairty_score": 0.524968683719635, "pdf_reference": "Experimental", "generated_facts": "Let's construct some simple fact sentences using the provided relationships.\n\nSince we're working with a single entity, I'll use the first option: (Experimental, is, ). This implies that Experimental is a thing or concept that exists on its own.\n\nHere are a few examples of fact sentences:\n\n1. The experimental process is ongoing.\n2. Experimental methods have been used in various fields.\n3. The term \"experimental\" refers to a type of scientific inquiry.\n\nNow, let's try the second option: (Experimental, related to, ). This implies that Experimental has some connection or association with something else.\n\nHere are a few examples of fact sentences:\n\n1. Experimental results often require statistical analysis.\n2. The experimental approach is distinct from traditional methods.\n3. Experimental design plays a crucial role in scientific research.\n\nPlease let me know if you'd like to explore more relationships or provide additional context!"}]}]} \ No newline at end of file diff --git a/CLI/Result_Example/SolarNavieRAG_Example.json b/CLI/Result_Example/SolarNavieRAG_Example.json deleted file mode 100644 index 7551849..0000000 --- a/CLI/Result_Example/SolarNavieRAG_Example.json +++ /dev/null @@ -1 +0,0 @@ -{"paper_title": "Photocatalytic Reduction of Greenhouse Gas CO2 to Fuel", "DOI": "10.1007/s10563-009-9065-9", "generation_model": "llama3.2:3b", "similarity_model": "nomic-embed-text", "similarity_metric": "Cosine_Similarity", "rag_type": "naive", "result": [{"question_category": "catalyst", "query": "What is the chemical name of the catalyst used in the experiment?", "generation": "###catalyst: Titanium Dioxide (TiO2)", "RAG_source": "chunks", "selected_answer": {"catalyst": " Titanium Dioxide (TiO2)"}, "evidences": [{"similairty_score": 0.6278509497642517, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.6265729069709778, "pdf_reference": "Article_Title\nPhotocatalytic Reduction of Greenhouse Gas CO2 to Fuel\nAbstract"}, {"similairty_score": 0.6251746416091919, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel"}, {"similairty_score": 0.6206226944923401, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced"}, {"similairty_score": 0.6131582260131836, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed."}]}, {"question_category": "co_catalyst", "query": "What is the chemical name of the co_catalyst used in the experiment?", "generation": "###co_catalyst: Cu-Fe", "RAG_source": "chunks", "selected_answer": {"co_catalyst": " Cu-Fe"}, "evidences": [{"similairty_score": 0.6458245515823364, "pdf_reference": "Article_Title\nPhotocatalytic Reduction of Greenhouse Gas CO2 to Fuel\nAbstract"}, {"similairty_score": 0.6371890902519226, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.626099705696106, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed."}, {"similairty_score": 0.6174551248550415, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was"}, {"similairty_score": 0.6073813438415527, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced"}]}, {"question_category": "Light_source", "query": "From the choices provided, which light source is used in the experiment?", "generation": "###Light_source: UV", "RAG_source": "chunks", "selected_answer": {"Light_source": " UV"}, "evidences": [{"similairty_score": 0.6839399337768555, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.6214293837547302, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.6177785396575928, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption"}, {"similairty_score": 0.6095446944236755, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.6055726408958435, "pdf_reference": "transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig. 3.The fibers with length of nearly 11 cm long were inserted into the OFPR.The optical fibers were supported on circular plates with a diameter of 5.0 cm.The OFPR was irradiated by artificial light or concentrated natural sunlight through the quartz window at one side.The artificial light (320-500 nm) was supplied by Exfo Acticure 4,000 with a highest intensity at 365 nm using an appropriate color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar"}]}, {"question_category": "Lamp", "query": "From the choices provided, which lamp is used in the experiment?", "generation": "###Lamp: 'Mercury-Xenon'", "RAG_source": "chunks", "selected_answer": {"Lamp": " 'Mercury-Xenon'"}, "evidences": [{"similairty_score": 0.6403747797012329, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.6216245889663696, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.601848304271698, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption"}, {"similairty_score": 0.59084552526474, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.5811028480529785, "pdf_reference": "Experimental"}]}, {"question_category": "Reaction_medium", "query": "From the choices provided, which reaction medium is used in the experiment?", "generation": "###Reaction_medium: Liquid", "RAG_source": "chunks", "selected_answer": {"Reaction_medium": " Liquid"}, "evidences": [{"similairty_score": 0.6020915508270264, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.5890271067619324, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.5843930244445801, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.5805729031562805, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel"}, {"similairty_score": 0.5772931575775146, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced"}]}, {"question_category": "Reactor_type", "query": "From the choices provided, which reactor type is used in the experiment?", "generation": "###Reactor_type: Optical Fiber", "RAG_source": "chunks", "selected_answer": {"Reactor_type": " Optical Fiber"}, "evidences": [{"similairty_score": 0.6050081253051758, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.5929175615310669, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.5833992958068848, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.5747902989387512, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was"}, {"similairty_score": 0.5674387216567993, "pdf_reference": "Experimental"}]}, {"question_category": "Operation_mode", "query": "From the choices provided, which oepration mode is used in the experiment?", "generation": "###Operation_mode: 'Continuous'", "RAG_source": "chunks", "selected_answer": {"Operation_mode": " 'Continuous'"}, "evidences": [{"similairty_score": 0.6241655349731445, "pdf_reference": "Experimental"}, {"similairty_score": 0.5862252116203308, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.5714736580848694, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.5229113101959229, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption"}, {"similairty_score": 0.49779558181762695, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel"}]}]} \ No newline at end of file diff --git a/CLI/Result_Spec.md b/CLI/Result_Spec.md deleted file mode 100644 index 0b0b001..0000000 --- a/CLI/Result_Spec.md +++ /dev/null @@ -1,78 +0,0 @@ -# SolarRAG-CLI Result Specification - -## Result Specification Table - -Here is the table that describe the saved output json file: - -| Category | Definition | DataType | Example | -| -------- | ------- | ------- | ------- | -| paper_title | The title of the paper | String | Effect of silver doping on the TiO2 for photocatalytic reduction of CO2 | -| DOI | The DOI of the paper(extracted from the paper pdf file) | String | 10.1016/j.apcatb.2010.02.030 | -| generation_model | The ollama model id for llm generation | String | llama3.2:3b | -| similarity_model | The ollama model id for text embedding | String | nomic-embed-text | -| similarity_metric | The metric for calculating the similarity between embeddings | String | Cosine_Similarity | -| rag_type | the type of rag pipeline, range=['fact', 'naive'] | String | fact | -| result | The list of generation result from the llm | List | Details given velow | - -The details for `result` part of the saved output json file: - -| Category | Definition | DataType | Example | Range(if applicable) | -| -------- | ------- | ------- | ------- | ------- | -| question_category | The category of the question for the llm | String | Light_source | [catalyst, co_catalyst, Light_source, Lamp, Reaction_medium, Reactor_type, Operation_mode] | -| query | The query for the llm | String | What is the Light_source used in the experiment? | Not applicable | -| generation | The generated answer from the llm | String | ###Light_source: UV | Not applicable | -| RAG_source | The information source provided for the RAG pipeline | String | generated_facts | Not applicable | -| selected_answer | The corresponding answer from the selection of choices | String | UV | Details are given below | -| evidences | The list of evidences for the RAG pipeline | list | Details given below | Not applicable | - -The range for `selected_answer` with correspongind `question_category`: -| question_category | Range(if applicable) | -| -------- | ------- | -| catalyst | Not applicable | -| co_catalyst | Not applicable | -| Light_source | 'UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator', 'Do not Know' | -| Lamp | 'Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator', 'Do not Know' | -| Reaction_medium | 'Liquid', 'Gas', 'Do not Know' | -| Reactor_type | 'Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed', 'Do not Know' | -| Operation_mode | 'Batch', 'Continuous', 'Batch/Continuous', 'Do not Know' | - -The details for `evidence` part of the `result` from the output json: - -| Category | Definition | DataType | Example | -| -------- | ------- | ------- | ------- | -| similairty_score | The similairty score between the query and correspond text provided to the embedding model | Float | 0.6205 | -| pdf_reference | The original text that are extracted from the paper | String | in the conventional focusing... | -| generated_fact | The generated facts based on the pdf_reference | String | Facts: 1. A homemade apparatus is used... | - -> **_NOTE:_** `generated_fact` only existed when the type of RAG pipeline is `fact` RAG. - -## Example of the SolarRAG-CLI result - -*Example:* -```json - "paper_title": "Effect of silver doping on the TiO2 for photocatalytic reduction of CO2", - "DOI": "10.1016/j.apcatb.2010.02.030", - "generation_model": "llama3.2:3b", - "similarity_model": "nomic-embed-text", - "similarity_metric": "Cosine_Similarity", - "rag_type": "fact", - "result": [ - { - "question_category": "catalyst", - "query": "What is the chemical name of the catalyst used in the experiment", - "generation": "###catalyst: Titanium dioxide (TiO2)", - "RAG_source": "generated_facts", - "selected_answer": { - "catalyst": " Titanium dioxide (TiO2)" - }, - "evidences": [ - { - "similairty_score": 0.6646425724029541, - "pdf_reference": "methods were described in our previous publication 39.It is important to minimize the influence of transport phenomena during kinetic measurements.The elimination of CO 2 diffusion from the bulk of gas through the gas-liquid interface in a laboratory batch slurry reactor was accomplished by saturating the liquid with pure CO 2 before the reaction had been started 4,11.Catalyst loading of 1 g dm -3 was chosen to avoid concentration gradients in the bulk of stirred liquid with TiO 2 suspension due to the scattering effect of light caused by the high TiO 2 concentration 11,16,40,41.The", - "generated_facts": "Facts:\n1. The methods described in publication 39 were used for kinetic measurements.\n2. Kinetic measurements should minimize influence.\n3. CO2 diffusion from the bulk of gas through the gas-liquid interface was accomplished by saturating the liquid with pure CO2.\n4. Saturating the liquid with pure CO2 resulted in a high concentration of TiO2 suspension due to scattering effects of light.\n5. The scattering effect of light caused concentration gradients in the reactor.\n6. Concentration gradients in the reactor were avoided by choosing an optimal catalyst loading.\n7. An optimal catalyst loading was used to eliminate CO2 diffusion from the bulk of gas.\n8. Eliminating CO2 diffusion from the bulk of gas resulted in a laboratory batch slurry reactor being used for reaction.\n9. A laboratory batch slurry reactor was used to accomplish elimination of CO2 diffusion.\n10. The elimination of CO2 diffusion was achieved by using a TiO2 suspension that scattered light." - }, - ... - ] - } - ] -``` \ No newline at end of file diff --git a/CLI/code/FactsRAG.py b/CLI/code/FactsRAG.py deleted file mode 100644 index c0cd9dd..0000000 --- a/CLI/code/FactsRAG.py +++ /dev/null @@ -1,280 +0,0 @@ -from dotenv import load_dotenv -from cdlib import algorithms -import networkx as nx -import os -import json -import argparse -from collections import OrderedDict -from process import * - - -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.prompts import PromptTemplate -from langchain.retrievers import ContextualCompressionRetriever -from langchain.docstore.document import Document -from langchain_ollama import OllamaEmbeddings -from langchain_ollama import ChatOllama -from sentence_transformers import util - - -def get_text(data): - title_list = ["Article_Title", "Abstract", "Experimental", "Results and discussion"] - context = "" - context_list = [] - for section in data: - if section["title"] in title_list: - # self.context_list.append(f"{section["title"]}: {section["content"]}") - context += section["title"] - context += "\n" - context += section["content"] - context += "\n" - return context - - -def get_title(data): - for section in data: - if section["title"] == "Article_Title": - return section["content"] - else: - pass - return "None Given" - - -def get_doi(data): - for section in data: - if section["title"] == "Doi": - return section["content"] - else: - pass - return "None Given" - - -def clean_response(gen, category): - print(gen) - res = {} - s, e = gen.split(":") - res[category] = e - print(res) - return res - - -class SolarFact: - def __init__(self, llm_id, embedding_id, input_file_path=str(), context_file_path=str()): - self.llm_id = llm_id - self.embedding_id = embedding_id - self.input_file_path = input_file_path - self.context_file_path = context_file_path - self._get_llm() - self._get_documents() - self.context_result = { - "paper_title": self.paper_title, - "DOI": self.doi, - "generation_model": self.llm_id, - "similarity_model": self.embedding_id, - "similarity_metric": "Cosine_Similarity", - "result": [] - } - self.chunks, self.entities, self.relations, self.facts = self._prepare_pipeline() - - def _get_llm(self): - self.llm = ChatOllama(model=self.llm_id, temperature=0) - self.embeddings = OllamaEmbeddings(model=self.embedding_id) - - def _get_documents(self): - if self.input_file_path[-3:] == "pdf": - # print(1) - data = process_paper(self.input_file_path) - else: - with open(self.input_file_path, "rb") as f: - data = json.load(f) - self.paper_title = get_title(data) - self.doi = get_doi(data) - self.documents = get_text(data) - - def _split_documents_into_chunks(self, chunk_size=600, overlap_size=100): - documents = [Document(page_content=self.documents)] - text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap_size) - chunks = text_splitter.split_documents(documents) - return chunks - - def _extract_entities_from_chunks(self, chunks): - entities = {} - title_list = ['Article Title', "Abstract", "Experimental", "Results and discussion"] - print(f"------Start extracting entities from chunks------") - print(f"------Total chunk count: {len(chunks)}------") - for index, chunk in enumerate(chunks): - if chunk in title_list: - pass - else: - response = self.llm.invoke( - [ - {"role": "system", "content": "Extract all the entities from the following text."}, - {"role": "user", "content": chunk.page_content} - ] - ) - entities_for_chunks = response.content - entities[index] = entities_for_chunks - print(f"------Entities extraction is done------") - return entities - - def _extract_relationships_from_chunks_and_entities(self, chunks, entities): - relations = {} - title_list = ['Article Title', "Abstract", "Experimental", "Results and discussion"] - print(f"------Start extracting entities from chunks------") - print(f"------Total chunk count: {len(chunks)}------") - for index, chunk in enumerate(chunks): - if chunk in title_list: - pass - else: - response = self.llm.invoke( - [ - {"role": "system", "content": "Extract all the relationship from the following context and provided entities in the format of triples, (subject, predicate, object)"}, - {"role": "user", "content": f"Context: {chunk.page_content}, Entities: {entities[index]}"} - ] - ) - relation = response.content - relations[index] = relation - print(f"------Relationships extraction is done------") - return relations - - def _generate_facts_from_relations(self, chunks, relations): - facts = {} - print(f"------Start generating factual sentences------") - for index, chunk in enumerate(chunks): - response = self.llm.invoke( - [ - {"role": "system", "content": "Construct simple fact sentences by combining the following relationships after the \"Facts:\" word."}, - {"role": "user", "content": f" Relations: {relations[index]}"} - ] - ) - fact = response.content - facts[index] = fact - print(f"------Facts generation is done------") - return facts - - def _cal_fact_cosine_similairty(self, facts, prompt, category): - if isinstance(facts, list): - facts = {index: value for index, value in enumerate(facts)} - sim_dict = {} - for key, fact in facts.items(): - fact_embed = self.embeddings.embed_query(fact) - query_embed = self.embeddings.embed_query(prompt) - cosine_similarity = util.cos_sim(fact_embed, query_embed) - sim_dict[key] = cosine_similarity[0].detach().item() - sorted_sim_dict = dict(sorted(sim_dict.items(), key=lambda item: item[1], reverse=True)) - return sorted_sim_dict - - def _generate_final_answer(self, sort_sim_dict, k, text, prompt, category, selection=None): - indexes = list(sort_sim_dict.keys())[:k] - context = "" - for ind in indexes: - context += text[ind] - if selection is None: - final_response = self.llm.invoke( - [ - {"role": "system", "content": f"Answer the following query based on the provided summary of facts. Please indicate the answer following the structure of \"###{category}:\" YOUR ANSWER"}, - {"role": "user", "content": f"Facts: {context} Query : {prompt}"} - ] - ) - - else: - final_response = self.llm.invoke( - [ - {"role": "system", "content": f"Answer the following query by selecting only one of the choices based on the provided summary of facts. Please indicate the answer following the structure of \"###{category}:\" YOUR ANSWER"}, - {"role": "user", "content": f"Query: {prompt} Choices: {selection} Facts: {context}"} - ] - ) - return indexes, final_response.content - - def _prepare_pipeline(self): - chunks = self._split_documents_into_chunks() - - entities = self._extract_entities_from_chunks(chunks) - - relations = self._extract_relationships_from_chunks_and_entities(chunks, entities) - - facts = self._generate_facts_from_relations(chunks, relations) - - return chunks, entities, relations, facts - - - def graphrag_pipeline(self, k, prompt, category): - print("---------------------running factrag pipeline---------------------") - ## Prepare data - #self.chunks, self.entities, self.relations, self.facts - - ## Get choices - if category in ["catalyst", "co_catalyst"]: - selection = None - elif category == "Light_source": - selection = "'UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator', 'Do not Know'" - elif category == "Lamp": - selection = "'Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator', 'Do not Know'" - elif category == "Reaction_medium": - selection = "'Liquid', 'Gas', 'Do not Know'" - elif category == "Reactor_type": - selection = "'Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed', 'Do not Know'" - elif category == 'Operation_mode': - selection = "'Batch', 'Continuous', 'Batch/Continuous', 'Do not Know'" - - ## Run Pipeline - sim_dict = self._cal_fact_cosine_similairty(self.facts, prompt, category) - - indexes, final_response = self._generate_final_answer(sim_dict, k, self.facts, prompt, category, selection) - - evidences = [] - - for index in indexes: - evidence = { - "similairty_score": sim_dict[index], - "pdf_reference": self.chunks[index].page_content, - "generated_facts": self.facts[index] - } - evidences.append(evidence) - temp = { - "question_category": category, - "query": prompt, - "generation": final_response, - "RAG_source": "generated_facts", - "selected_answer": clean_response(final_response, category), - "evidences": evidences - } - return temp - - -def get_parser(): - parser = argparse.ArgumentParser(description="Demo of LLM Pipeline") - parser.add_argument('--llm_id', type=str, default=True, help="the parameter of which LLM model from ollama to use") - parser.add_argument('--embedding_id', type=str, default=True, help="the parameter of which embedding model from ollama to use") - parser.add_argument('--input_file_path', type=str, default=True, help="path for input data, pdf file or extracted json file") - parser.add_argument('--prompt_file', help='queries', type=str) - parser.add_argument('--context_file_path', type=str, default=True, help="save context file") - return parser - - -def main(): - parser = get_parser() - args = parser.parse_args() - args_dict = vars(args) - prompt_file = args_dict["prompt_file"] - del args_dict["prompt_file"] - factrag = SolarFact(**args_dict) - context_result = { - "paper_title": factrag.paper_title, - "DOI": factrag.doi, - "generation_model": factrag.llm_id, - "similarity_model": factrag.embedding_id, - "similarity_metric": "Cosine_Similarity", - "result": [] - } - with open(prompt_file, "rb") as f: - query_data = json.load(f) - for key, value in query_data.items(): - temp = factrag.graphrag_pipeline(5, value, key) - context_result["result"].append(temp) - # print(context_result) - with open(factrag.context_file_path, "w") as f: - json.dump(context_result, f) - - -main() \ No newline at end of file diff --git a/CLI/code/GraphRAG/graphrag.ipynb b/CLI/code/GraphRAG/graphrag.ipynb deleted file mode 100644 index afc5beb..0000000 --- a/CLI/code/GraphRAG/graphrag.ipynb +++ /dev/null @@ -1,626 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "2c1add81-a960-4df4-84ff-db1778521daa", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mCollecting langchain-experimental\n", - " Downloading langchain_experimental-0.3.3-py3-none-any.whl (208 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m209.0/209.0 kB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting langchain-community<0.4.0,>=0.3.0\n", - " Downloading langchain_community-0.3.5-py3-none-any.whl (2.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", - "\u001b[?25hCollecting langchain-core<0.4.0,>=0.3.15\n", - " Downloading langchain_core-0.3.15-py3-none-any.whl (408 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m408.7/408.7 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting aiohttp<4.0.0,>=3.8.3\n", - " Downloading aiohttp-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hCollecting dataclasses-json<0.7,>=0.5.7\n", - " Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", - "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.10/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental) (6.0)\n", - "Collecting pydantic-settings<3.0.0,>=2.4.0\n", - " Downloading pydantic_settings-2.6.1-py3-none-any.whl (28 kB)\n", - "Requirement already satisfied: SQLAlchemy<2.0.36,>=1.4 in /opt/conda/lib/python3.10/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental) (1.4.41)\n", - "Collecting langchain<0.4.0,>=0.3.6\n", - " Downloading langchain-0.3.7-py3-none-any.whl (1.0 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hCollecting langsmith<0.2.0,>=0.1.125\n", - " Downloading langsmith-0.1.142-py3-none-any.whl (306 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m306.7/306.7 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.10/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental) (2.28.1)\n", - "Collecting tenacity!=8.4.0,<10,>=8.1.0\n", - " Downloading tenacity-9.0.0-py3-none-any.whl (28 kB)\n", - "Requirement already satisfied: numpy<2,>=1 in /opt/conda/lib/python3.10/site-packages (from langchain-community<0.4.0,>=0.3.0->langchain-experimental) (1.22.4)\n", - "Collecting httpx-sse<0.5.0,>=0.4.0\n", - " Downloading httpx_sse-0.4.0-py3-none-any.whl (7.8 kB)\n", - "Collecting pydantic<3.0.0,>=2.5.2\n", - " Downloading pydantic-2.9.2-py3-none-any.whl (434 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m434.9/434.9 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.15->langchain-experimental) (23.2)\n", - "Collecting typing-extensions>=4.7\n", - " Downloading typing_extensions-4.12.2-py3-none-any.whl (37 kB)\n", - "Collecting jsonpatch<2.0,>=1.33\n", - " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (22.1.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (1.2.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (1.3.1)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (6.0.2)\n", - "Collecting yarl<2.0,>=1.12.0\n", - " Downloading yarl-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (318 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.7/318.7 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting aiohappyeyeballs>=2.3.0\n", - " Downloading aiohappyeyeballs-2.4.3-py3-none-any.whl (14 kB)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (4.0.2)\n", - "Collecting typing-inspect<1,>=0.4.0\n", - " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", - "Collecting marshmallow<4.0.0,>=3.18.0\n", - " Downloading marshmallow-3.23.1-py3-none-any.whl (49 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.5/49.5 kB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting jsonpointer>=1.9\n", - " Downloading jsonpointer-3.0.0-py2.py3-none-any.whl (7.6 kB)\n", - "Collecting langchain-text-splitters<0.4.0,>=0.3.0\n", - " Downloading langchain_text_splitters-0.3.2-py3-none-any.whl (25 kB)\n", - "Collecting requests-toolbelt<2.0.0,>=1.0.0\n", - " Downloading requests_toolbelt-1.0.0-py2.py3-none-any.whl (54 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.5/54.5 kB\u001b[0m \u001b[31m21.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting orjson<4.0.0,>=3.9.14\n", - " Downloading orjson-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (142 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting httpx<1,>=0.23.0\n", - " Downloading httpx-0.27.2-py3-none-any.whl (76 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting pydantic-core==2.23.4\n", - " Downloading pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hCollecting annotated-types>=0.6.0\n", - " Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\n", - "Collecting python-dotenv>=0.21.0\n", - " Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (2.1.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (2022.6.15.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (3.3)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (1.26.11)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.10/site-packages (from SQLAlchemy<2.0.36,>=1.4->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (1.1.3)\n", - "Collecting httpcore==1.*\n", - " Downloading httpcore-1.0.6-py3-none-any.whl (78 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.0/78.0 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (1.3.0)\n", - "Requirement already satisfied: anyio in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (3.6.1)\n", - "Collecting h11<0.15,>=0.13\n", - " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m15.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: mypy-extensions>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community<0.4.0,>=0.3.0->langchain-experimental) (1.0.0)\n", - "Collecting propcache>=0.2.0\n", - " Downloading propcache-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (208 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m208.9/208.9 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hInstalling collected packages: typing-extensions, tenacity, python-dotenv, propcache, orjson, marshmallow, jsonpointer, httpx-sse, h11, annotated-types, aiohappyeyeballs, yarl, typing-inspect, requests-toolbelt, pydantic-core, jsonpatch, httpcore, pydantic, httpx, dataclasses-json, aiohttp, pydantic-settings, langsmith, langchain-core, langchain-text-splitters, langchain, langchain-community, langchain-experimental\n", - " Attempting uninstall: typing-extensions\n", - " Found existing installation: typing_extensions 4.3.0\n", - " Uninstalling typing_extensions-4.3.0:\n", - " Successfully uninstalled typing_extensions-4.3.0\n", - " Attempting uninstall: yarl\n", - " Found existing installation: yarl 1.7.2\n", - " Uninstalling yarl-1.7.2:\n", - " Successfully uninstalled yarl-1.7.2\n", - " Attempting uninstall: aiohttp\n", - " Found existing installation: aiohttp 3.8.1\n", - " Uninstalling aiohttp-3.8.1:\n", - " Successfully uninstalled aiohttp-3.8.1\n", - "Successfully installed aiohappyeyeballs-2.4.3 aiohttp-3.10.10 annotated-types-0.7.0 dataclasses-json-0.6.7 h11-0.14.0 httpcore-1.0.6 httpx-0.27.2 httpx-sse-0.4.0 jsonpatch-1.33 jsonpointer-3.0.0 langchain-0.3.7 langchain-community-0.3.5 langchain-core-0.3.15 langchain-experimental-0.3.3 langchain-text-splitters-0.3.2 langsmith-0.1.142 marshmallow-3.23.1 orjson-3.10.11 propcache-0.2.0 pydantic-2.9.2 pydantic-core-2.23.4 pydantic-settings-2.6.1 python-dotenv-1.0.1 requests-toolbelt-1.0.0 tenacity-9.0.0 typing-extensions-4.12.2 typing-inspect-0.9.0 yarl-1.17.1\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: langchain in /opt/conda/lib/python3.10/site-packages (0.3.7)\n", - "Collecting langchain_groq\n", - " Downloading langchain_groq-0.2.1-py3-none-any.whl (14 kB)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/conda/lib/python3.10/site-packages (from langchain) (1.4.41)\n", - "Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in /opt/conda/lib/python3.10/site-packages (from langchain) (0.1.142)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/conda/lib/python3.10/site-packages (from langchain) (3.10.10)\n", - "Requirement already satisfied: langchain-core<0.4.0,>=0.3.15 in /opt/conda/lib/python3.10/site-packages (from langchain) (0.3.15)\n", - "Requirement already satisfied: numpy<2,>=1 in /opt/conda/lib/python3.10/site-packages (from langchain) (1.22.4)\n", - "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /opt/conda/lib/python3.10/site-packages (from langchain) (4.0.2)\n", - "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from langchain) (0.3.2)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /opt/conda/lib/python3.10/site-packages (from langchain) (9.0.0)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /opt/conda/lib/python3.10/site-packages (from langchain) (2.9.2)\n", - "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.10/site-packages (from langchain) (6.0)\n", - "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.10/site-packages (from langchain) (2.28.1)\n", - "Collecting groq<1,>=0.4.1\n", - " Downloading groq-0.11.0-py3-none-any.whl (106 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.5/106.5 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.2)\n", - "Requirement already satisfied: yarl<2.0,>=1.12.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.17.1)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.3)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.2.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (22.1.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", - "Requirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (1.3.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (0.27.2)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /opt/conda/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (4.12.2)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /opt/conda/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (3.6.1)\n", - "Collecting distro<2,>=1.7.0\n", - " Downloading distro-1.9.0-py3-none-any.whl (20 kB)\n", - "Requirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.15->langchain) (23.2)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.15->langchain) (1.33)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (3.10.11)\n", - "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (1.0.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.23.4)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2.1.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (1.26.11)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2022.6.15.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (3.3)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain) (1.1.3)\n", - "Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->groq<1,>=0.4.1->langchain_groq) (1.0.6)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->groq<1,>=0.4.1->langchain_groq) (0.14.0)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.15->langchain) (3.0.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /opt/conda/lib/python3.10/site-packages (from yarl<2.0,>=1.12.0->aiohttp<4.0.0,>=3.8.3->langchain) (0.2.0)\n", - "Installing collected packages: distro, groq, langchain_groq\n", - "Successfully installed distro-1.9.0 groq-0.11.0 langchain_groq-0.2.1\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: langchain-community in /opt/conda/lib/python3.10/site-packages (0.3.5)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (9.0.0)\n", - "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (2.6.1)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (3.10.10)\n", - "Requirement already satisfied: numpy<2,>=1 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (1.22.4)\n", - "Requirement already satisfied: langchain<0.4.0,>=0.3.6 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (0.3.7)\n", - "Requirement already satisfied: httpx-sse<0.5.0,>=0.4.0 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (0.4.0)\n", - "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (0.6.7)\n", - "Requirement already satisfied: langchain-core<0.4.0,>=0.3.15 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (0.3.15)\n", - "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (2.28.1)\n", - "Requirement already satisfied: langsmith<0.2.0,>=0.1.125 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (0.1.142)\n", - "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (6.0)\n", - "Requirement already satisfied: SQLAlchemy<2.0.36,>=1.4 in /opt/conda/lib/python3.10/site-packages (from langchain-community) (1.4.41)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (22.1.0)\n", - "Requirement already satisfied: yarl<2.0,>=1.12.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.17.1)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (2.4.3)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.0.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.1)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.2.0)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (4.0.2)\n", - "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /opt/conda/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /opt/conda/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (3.23.1)\n", - "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from langchain<0.4.0,>=0.3.6->langchain-community) (0.3.2)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /opt/conda/lib/python3.10/site-packages (from langchain<0.4.0,>=0.3.6->langchain-community) (2.9.2)\n", - "Requirement already satisfied: typing-extensions>=4.7 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.15->langchain-community) (4.12.2)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.15->langchain-community) (1.33)\n", - "Requirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.15->langchain-community) (23.2)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-community) (3.10.11)\n", - "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-community) (1.0.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-community) (0.27.2)\n", - "Requirement already satisfied: python-dotenv>=0.21.0 in /opt/conda/lib/python3.10/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community) (1.0.1)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (2.1.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (2022.6.15.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (1.26.11)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain-community) (3.3)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.10/site-packages (from SQLAlchemy<2.0.36,>=1.4->langchain-community) (1.1.3)\n", - "Requirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community) (1.3.0)\n", - "Requirement already satisfied: anyio in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community) (3.6.1)\n", - "Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community) (1.0.6)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-community) (0.14.0)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.15->langchain-community) (3.0.0)\n", - "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain<0.4.0,>=0.3.6->langchain-community) (2.23.4)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain<0.4.0,>=0.3.6->langchain-community) (0.7.0)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community) (1.0.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /opt/conda/lib/python3.10/site-packages (from yarl<2.0,>=1.12.0->aiohttp<4.0.0,>=3.8.3->langchain-community) (0.2.0)\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: langchain-core in /opt/conda/lib/python3.10/site-packages (0.3.15)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.10/site-packages (from langchain-core) (1.33)\n", - "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.10/site-packages (from langchain-core) (6.0)\n", - "Requirement already satisfied: langsmith<0.2.0,>=0.1.125 in /opt/conda/lib/python3.10/site-packages (from langchain-core) (0.1.142)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.5.2 in /opt/conda/lib/python3.10/site-packages (from langchain-core) (2.9.2)\n", - "Requirement already satisfied: typing-extensions>=4.7 in /opt/conda/lib/python3.10/site-packages (from langchain-core) (4.12.2)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /opt/conda/lib/python3.10/site-packages (from langchain-core) (9.0.0)\n", - "Requirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.10/site-packages (from langchain-core) (23.2)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core) (3.0.0)\n", - "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core) (1.0.0)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core) (3.10.11)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core) (0.27.2)\n", - "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core) (2.28.1)\n", - "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core) (2.23.4)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.5.2->langchain-core) (0.7.0)\n", - "Requirement already satisfied: anyio in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (3.6.1)\n", - "Requirement already satisfied: idna in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (3.3)\n", - "Requirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (1.3.0)\n", - "Requirement already satisfied: certifi in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (2022.6.15.1)\n", - "Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (1.0.6)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.2.0,>=0.1.125->langchain-core) (0.14.0)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.125->langchain-core) (1.26.11)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.125->langchain-core) (2.1.1)\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: huggingface-hub in /opt/conda/lib/python3.10/site-packages (0.20.2)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub) (2023.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub) (23.2)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub) (4.64.1)\n", - "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub) (6.0)\n", - "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub) (2.28.1)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub) (4.12.2)\n", - "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub) (3.13.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub) (2022.6.15.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub) (3.3)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub) (1.26.11)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub) (2.1.1)\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mCollecting typing-extensions==4.7.1\n", - " Downloading typing_extensions-4.7.1-py3-none-any.whl (33 kB)\n", - "Installing collected packages: typing-extensions\n", - " Attempting uninstall: typing-extensions\n", - " Found existing installation: typing_extensions 4.12.2\n", - " Uninstalling typing_extensions-4.12.2:\n", - " Successfully uninstalled typing_extensions-4.12.2\n", - "Successfully installed typing-extensions-4.7.1\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mCollecting sentence-transformers==2.7.0\n", - " Downloading sentence_transformers-2.7.0-py3-none-any.whl (171 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m171.5/171.5 kB\u001b[0m \u001b[31m16.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: huggingface-hub>=0.15.1 in /opt/conda/lib/python3.10/site-packages (from sentence-transformers==2.7.0) (0.20.2)\n", - "Requirement already satisfied: Pillow in /opt/conda/lib/python3.10/site-packages (from sentence-transformers==2.7.0) (9.2.0)\n", - "Requirement already satisfied: transformers<5.0.0,>=4.34.0 in /opt/conda/lib/python3.10/site-packages (from sentence-transformers==2.7.0) (4.36.2)\n", - "Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.10/site-packages (from sentence-transformers==2.7.0) (1.1.2)\n", - "Requirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from sentence-transformers==2.7.0) (1.22.4)\n", - "Requirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from sentence-transformers==2.7.0) (1.9.1)\n", - "Requirement already satisfied: torch>=1.11.0 in /opt/conda/lib/python3.10/site-packages (from sentence-transformers==2.7.0) (2.1.2)\n", - "Requirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from sentence-transformers==2.7.0) (4.64.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (2023.10.0)\n", - "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (6.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (4.7.1)\n", - "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (2.28.1)\n", - "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (3.13.1)\n", - "Requirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (23.2)\n", - "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (3.1.2)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (2.18.1)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (12.1.105)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (8.9.2.26)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (12.1.105)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (12.1.3.1)\n", - "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (2.8.6)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (12.1.105)\n", - "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (1.10.1)\n", - "Requirement already satisfied: triton==2.1.0 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (2.1.0)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (12.1.105)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /opt/conda/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers==2.7.0) (11.4.5.107)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /opt/conda/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers==2.7.0) (12.3.101)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers==2.7.0) (0.4.1)\n", - "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers==2.7.0) (2023.12.25)\n", - "Requirement already satisfied: tokenizers<0.19,>=0.14 in /opt/conda/lib/python3.10/site-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers==2.7.0) (0.15.0)\n", - "Requirement already satisfied: joblib>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->sentence-transformers==2.7.0) (1.1.0)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->sentence-transformers==2.7.0) (3.1.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.11.0->sentence-transformers==2.7.0) (2.1.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (1.26.11)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (2.1.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (3.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers==2.7.0) (2022.6.15.1)\n", - "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.11.0->sentence-transformers==2.7.0) (1.2.1)\n", - "Installing collected packages: sentence-transformers\n", - "Successfully installed sentence-transformers-2.7.0\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: bitsandbytes in /opt/conda/lib/python3.10/site-packages (0.42.0)\n", - "Requirement already satisfied: accelerate in /opt/conda/lib/python3.10/site-packages (0.26.1)\n", - "Requirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from bitsandbytes) (1.9.1)\n", - "Requirement already satisfied: safetensors>=0.3.1 in /opt/conda/lib/python3.10/site-packages (from accelerate) (0.4.1)\n", - "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from accelerate) (23.2)\n", - "Requirement already satisfied: pyyaml in /opt/conda/lib/python3.10/site-packages (from accelerate) (6.0)\n", - "Requirement already satisfied: torch>=1.10.0 in /opt/conda/lib/python3.10/site-packages (from accelerate) (2.1.2)\n", - "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from accelerate) (1.22.4)\n", - "Requirement already satisfied: huggingface-hub in /opt/conda/lib/python3.10/site-packages (from accelerate) (0.20.2)\n", - "Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from accelerate) (5.9.2)\n", - "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.13.1)\n", - "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.1.2)\n", - "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.0.106)\n", - "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (8.9.2.26)\n", - "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.3.1)\n", - "Requirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (2023.10.0)\n", - "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (11.4.5.107)\n", - "Requirement already satisfied: triton==2.1.0 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (2.1.0)\n", - "Requirement already satisfied: nvidia-nccl-cu12==2.18.1 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (2.18.1)\n", - "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (4.7.1)\n", - "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (1.10.1)\n", - "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (11.0.2.54)\n", - "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (12.1.105)\n", - "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (2.8.6)\n", - "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (10.3.2.106)\n", - "Requirement already satisfied: nvidia-nvjitlink-cu12 in /opt/conda/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.10.0->accelerate) (12.3.101)\n", - "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub->accelerate) (2.28.1)\n", - "Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub->accelerate) (4.64.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub->accelerate) (1.26.11)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub->accelerate) (2.1.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub->accelerate) (3.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub->accelerate) (2022.6.15.1)\n", - "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.10.0->accelerate) (1.2.1)\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mCollecting grobid-client-python\n", - " Downloading grobid_client_python-0.0.8-py3-none-any.whl (15 kB)\n", - "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from grobid-client-python) (2.28.1)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->grobid-client-python) (1.26.11)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->grobid-client-python) (3.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->grobid-client-python) (2022.6.15.1)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->grobid-client-python) (2.1.1)\n", - "Installing collected packages: grobid-client-python\n", - "Successfully installed grobid-client-python-0.0.8\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (2.8.6)\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mRequirement already satisfied: langchain in /opt/conda/lib/python3.10/site-packages (0.3.7)\n", - "Requirement already satisfied: langchain_groq in /opt/conda/lib/python3.10/site-packages (0.2.1)\n", - "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /opt/conda/lib/python3.10/site-packages (from langchain) (9.0.0)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/conda/lib/python3.10/site-packages (from langchain) (1.4.41)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/conda/lib/python3.10/site-packages (from langchain) (3.10.10)\n", - "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /opt/conda/lib/python3.10/site-packages (from langchain) (2.9.2)\n", - "Requirement already satisfied: langchain-core<0.4.0,>=0.3.15 in /opt/conda/lib/python3.10/site-packages (from langchain) (0.3.15)\n", - "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.10/site-packages (from langchain) (2.28.1)\n", - "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from langchain) (0.3.2)\n", - "Requirement already satisfied: langsmith<0.2.0,>=0.1.17 in /opt/conda/lib/python3.10/site-packages (from langchain) (0.1.142)\n", - "Requirement already satisfied: PyYAML>=5.3 in /opt/conda/lib/python3.10/site-packages (from langchain) (6.0)\n", - "Requirement already satisfied: numpy<2,>=1 in /opt/conda/lib/python3.10/site-packages (from langchain) (1.22.4)\n", - "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /opt/conda/lib/python3.10/site-packages (from langchain) (4.0.2)\n", - "Requirement already satisfied: groq<1,>=0.4.1 in /opt/conda/lib/python3.10/site-packages (from langchain_groq) (0.11.0)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (22.1.0)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.2.0)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.3)\n", - "Requirement already satisfied: yarl<2.0,>=1.12.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.17.1)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.2)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /opt/conda/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (4.7.1)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /opt/conda/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (0.27.2)\n", - "Requirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (1.3.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /opt/conda/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (3.6.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /opt/conda/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (1.9.0)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.15->langchain) (1.33)\n", - "Requirement already satisfied: packaging<25,>=23.2 in /opt/conda/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.15->langchain) (23.2)\n", - "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (1.0.0)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /opt/conda/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.17->langchain) (3.10.11)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.23.4 in /opt/conda/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.23.4)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (1.26.11)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2.1.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2022.6.15.1)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests<3,>=2->langchain) (3.3)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain) (1.1.3)\n", - "Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx<1,>=0.23.0->groq<1,>=0.4.1->langchain_groq) (1.0.6)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->groq<1,>=0.4.1->langchain_groq) (0.14.0)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /opt/conda/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.15->langchain) (3.0.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /opt/conda/lib/python3.10/site-packages (from yarl<2.0,>=1.12.0->aiohttp<4.0.0,>=3.8.3->langchain) (0.2.0)\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m\u001b[33mWARNING: The directory '/home/jovyan/.cache/pip' or its parent directory is not owned or is not writable by the current user. The cache has been disabled. Check the permissions and owner of that directory. If executing pip with sudo, you should use sudo's -H flag.\u001b[0m\u001b[33m\n", - "\u001b[0mCollecting json-repair\n", - " Downloading json_repair-0.30.1-py3-none-any.whl (18 kB)\n", - "Installing collected packages: json-repair\n", - "Successfully installed json-repair-0.30.1\n", - "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", - "\u001b[0m" - ] - } - ], - "source": [ - "!pip install langchain-experimental\n", - "!pip install --upgrade langchain langchain_groq\n", - "!pip install --upgrade langchain-community\n", - "!pip install --upgrade langchain-core\n", - "!pip install huggingface-hub\n", - "!pip install typing-extensions==4.7.1 --upgrade\n", - "!pip install sentence-transformers==2.7.0\n", - "!pip install bitsandbytes accelerate\n", - "!pip install grobid-client-python\n", - "!pip install networkx\n", - "!pip install --upgrade langchain langchain_groq\n", - "!pip install json-repair" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "84c2894a-2638-4505-ba87-87dffba750e0", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from langchain_experimental.graph_transformers import LLMGraphTransformer\n", - "from langchain_groq import ChatGroq\n", - "import networkx as nx\n", - "from langchain.chains import GraphQAChain\n", - "from langchain_core.documents import Document\n", - "from langchain_community.graphs.networkx_graph import NetworkxEntityGraph" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "882df3a4-6a6c-4eb8-b285-18777af3a6dc", - "metadata": {}, - "outputs": [], - "source": [ - "os.environ[\"GROQ_API_KEY\"] = \"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\"" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "576bc8ab-ef1b-482f-95e1-fe07efb15afd", - "metadata": {}, - "outputs": [], - "source": [ - "llm = ChatGroq(temperature=0, model_name=\"llama-3.1-70b-versatile\")\n", - "\n", - "text = \"\"\"\n", - "Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\n", - "She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\n", - "Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\n", - "She was, in 1906, the first woman to become a professor at the University of Paris. \n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "dc4f4357-5026-4ad0-8e91-6175801f6b1b", - "metadata": {}, - "outputs": [], - "source": [ - "documents = [Document(page_content=text)]\n", - "llm_transformer = LLMGraphTransformer(llm=llm)\n", - "graph_documents = llm_transformer.convert_to_graph_documents(documents)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "92ab0dfa-fdc3-4fd9-926b-d01f8c113077", - "metadata": {}, - "outputs": [], - "source": [ - "llm_transformer_filtered = LLMGraphTransformer(\n", - " llm=llm,\n", - ")\n", - "graph_documents_filtered = llm_transformer_filtered.convert_to_graph_documents(\n", - " documents\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "9ba1ab80-33f3-4f50-a14b-666efe065f5a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[GraphDocument(nodes=[Node(id='Marie Curie', type='Person', properties={}), Node(id='Pierre Curie', type='Person', properties={}), Node(id='University Of Paris', type='Organization', properties={})], relationships=[Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='Pierre Curie', type='Person', properties={}), type='SPOUSE', properties={}), Relationship(source=Node(id='Marie Curie', type='Person', properties={}), target=Node(id='University Of Paris', type='Organization', properties={}), type='PROFESSOR', properties={})], source=Document(metadata={}, page_content='\\nMarie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\\nShe was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\\nHer husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\\nShe was, in 1906, the first woman to become a professor at the University of Paris. \\n'))]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "graph_documents_filtered" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "ccb6deab-0bee-4ae9-bbdb-1c7440760ab7", - "metadata": {}, - "outputs": [], - "source": [ - "graph = NetworkxEntityGraph()\n", - "\n", - "# Add nodes to the graph\n", - "for node in graph_documents_filtered[0].nodes:\n", - " graph.add_node(node.id)\n", - "\n", - "# Add edges to the graph\n", - "for edge in graph_documents_filtered[0].relationships:\n", - " graph._graph.add_edge(\n", - " edge.source.id,\n", - " edge.target.id,\n", - " relation=edge.type,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "e61b726f-20df-4e6b-bbcf-cfbdd832036a", - "metadata": {}, - "outputs": [], - "source": [ - "chain = GraphQAChain.from_llm(\n", - " llm=llm, \n", - " graph=graph, \n", - " verbose=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "f33e03ac-bdbd-4bd5-901b-18fcaed84de9", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new GraphQAChain chain...\u001b[0m\n", - "Entities Extracted:\n", - "\u001b[32;1m\u001b[1;3mMarie Curie\u001b[0m\n", - "Full Context:\n", - "\u001b[32;1m\u001b[1;3mMarie Curie SPOUSE Pierre Curie\n", - "Marie Curie PROFESSOR University Of Paris\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "\"I don't know who Marie Curie is, but I can tell you some information about her. She was a professor at the University of Paris and was married to Pierre Curie.\"" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "question = \"\"\"Who is Marie Curie?\"\"\"\n", - "chain.run(question)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "65c70e9a-0975-4c33-9fe1-70fd5fc6459d", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/CLI/code/GraphRAG/graphrag_prompt.json b/CLI/code/GraphRAG/graphrag_prompt.json deleted file mode 100644 index b67eaf1..0000000 --- a/CLI/code/GraphRAG/graphrag_prompt.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "catalyst/co_catalyst": "What is the chemical name for the catalyst and co_catalyst from the knowledge graph that about an solar chemical experiment?", - "light_source/lamp": "Which category of light source and lamp is used from the knowledge graph about a solar chemical experiment? The category is given below: \nLight_source: ['UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator']\nLamp: ['Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator']", - "reaction_medium": "Which category of reaction_medium is used from the knowledge graph about a solar chemical experiment? The category is given below: \nReaction_medium: ['Liquid', 'Gas']", - "reactor_type": "Which category of reactor_type is used from the knowledge graph about a solar chemical experiment? The category is given below: \nReactor_type: ['Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed']", - "operation_mode": "Which category of operation_modeis used from the knowledge graph about a solar chemical experiment? The category is given below: \nOperation_mode: ['Batch', 'Continuous', 'Batch/Continuous']" -} \ No newline at end of file diff --git a/CLI/code/GraphRAG/run_benchmark.sh b/CLI/code/GraphRAG/run_benchmark.sh deleted file mode 100644 index 6230488..0000000 --- a/CLI/code/GraphRAG/run_benchmark.sh +++ /dev/null @@ -1,36 +0,0 @@ -# !/bin/bash - -use_platform='False' -llm_id='Qwen/Qwen2.5-32B-Instruct' -input_file_path='/home/jovyan/grob/test.pdf' -prompt_file='/home/jovyan/GraphRAG/graphrag_prompt.json' -# context_file_path='/home/jovyan/UPM/Clark/GraphRAG/result/Qwen2.5-32B-Instruct/' -context_file_path='/home/jovyan/GraphRAG/result/test/' -graph_file_path='/home/jovyan/GraphRAG/result/test/' - -# Define the target directory -directory="/home/jovyan/GraphRAG/paper_all" - -# Check if the target is not a directory -if [ ! -d "$directory" ]; then - exit 1 -fi - -# Loop through files in the target directory -for file in "$directory"/*; do - if [ -f "$file" ]; then - var=$(echo "$file" | cut -d "_" -f 3) - ind=$(echo "$var" | cut -d "." -f 1) - output_file="${context_file_path}result_${ind}.json" - graph_file="${graph_file_path}graph_${ind}.gml" - if [ -f "$output_file" ]; then - echo "file exist" - else - python graphrag.py --use_platform "$use_platform" --llm_id "$llm_id" --input_file_path "$file" --prompt_file "$prompt_file" --context_file_path "$output_file" --graph_file_path "$graph_file" - echo $output_file - # break - fi - # python graphrag.py --use_platform use_platform --llm_id llm_id --input_file_path "$file" - fi -echo "DONE" -done \ No newline at end of file diff --git a/CLI/code/__pycache__/process.cpython-310.pyc b/CLI/code/__pycache__/process.cpython-310.pyc deleted file mode 100644 index 46f437440408bf91bb39c6003a72a8fe46e94396..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3826 zcmbVPTaz0{74DuHjkJ>X;`OfaC81_hm_#_17APvo#yA-7E*O$cV!IL;ucMls?$wSn zni1Wjy$FdiwXTZ&kUu~OrG=+F^AF&K-!QK{`6u8i0^ey#-d$5XfTrej_vtfzIp;eU zrPI@8hToqq{d4~4s4Z)H+?8<>j7M#$ zbe-)94XnN;oC{18KDE?E z^xX^WV|IsSmMD&9zF=Qkhfw#&Q1~6@u*^PqhqHmrS#JhXrM`VE@k_`I?BCcVIqL24 zJyx6AxVv5}`jwVUqzW>5YgPO6xR*}jNMPLDk%5rPUp&bRqb$fG2>FXq62@I2n=06A z9<$G%WCz>ZQ5=mwn9XF=4AWGJD8cgDKci)f^wiWKGJy)8awz8M--wXzdc`%RQnljs zme%$|*`bX+(N(aA_rdDAH`Y6bD_+gX-O!lZuZ;Q4j@Zr%9f)W7)bj;uj$hPL?M9)@ z9UM>kw$h!P%deQ4#-`7f8aruQHXfwA2SL)flg5E++J;jCB>{qBX9KmOn9$ z8}TPh*aQBU!$8hs7|OVBq07}bg*~+U*3dbE^soTM^Uv64d>aleijPVI8+~pfTWR2k za^Dt}LoTYnx5RYcX)!VLM=oX$EyI~LoH=mLj5+gtN1PG!Eo;JQIfDXtXT`#ZH*ou| zYW535SDfv;w8{}eM!x{lFZNcwH7F;Lo|a(<$AVdvMEDEfiyG#QJ@`G`G7_oxi}&6I zXSXY3LPe^)QFyI;xecf8UE0aA&Pt=Px3^cv_-a^BRjZNc#&_$>3fY78s`W+#Gb>q? z#d716b_wh*UXs%vfwZ6{!5<@!Twq^7vwhGnoq+}YvOaFG%I+@=kRxCNduWkn2TtEn zm-~EZXXufh4>{fRS7$^4Qt0Ex?nckSDSoY9#vVBzn1iW#3~Fv(+>R0v#BuICNTVb# zMZh^BBzJ>OM<&96LR}=`LSD*(R+G|4UL2_?%Na*!oW^uj6?%EhqkFt?JiRd&XP-gTiqJ3=3=#5qBwLPvtiMAK?zBidc|5rge5 zN_REBVX}I4sM_pg1ZD>F1p~re3PSpIr zSz^48J~r3$_3!4qa{`2w+#%G#qk>jm0_OBylkIx^WBd#M z)pS=NqrV4Iwo2Twt0-*d`HWpHSI?k`;qJd2@SoFXRs~Yv&2h(4KgMULe3<-+{#lsv zCPt^Xkg*Ke^w1(dRYF0>2je1l(G)-SsLZ8UFL>tm?L$72 zb{UFZhl*aTVOmr27P=o|OYZ|yuLoW?@d6d@M7yaeClAIsZdYO^@Zty#MM=<0w;sqW z)!xtUyuV@6Ul@pBlsEtS=LA`>fasE{-UU;xo+wMbY)8`4_1^janCb)o@`|@nE1DuO zua1jSlUa=76e%h%#%ZesR47sz3Y<)#)SX6Y+oa-k^be#;pg>{PL18Wx`KqF_=Ec%y zdQ!HUy+cUob0C&gwqQfc2Y773Qmia5y92M8~QzJzoIbWo7Br_#t zeQgB7yXZqH#L6otc%-M;<) zZB?S#RU%X}kMc30OVKe;T-3nYAi8v|or-QOuX9M4*i~io0PN(Mv&%LBY?bCqQ`I;B E4Ymv$aR2}S diff --git a/CLI/code/cli.py b/CLI/code/cli.py deleted file mode 100644 index fe1cf8c..0000000 --- a/CLI/code/cli.py +++ /dev/null @@ -1,278 +0,0 @@ -# File : pipeline.py -# Time :2024/7/14 14:11 -# Author :ClarkWang -# Contact :wzyyyyyy0519@gmail -# Description: - -# !pip install --upgrade langchain langchain_groq -# !pip install --upgrade langchain-community -# !pip install --upgrade langchain-core -# !pip install --upgrade langsmith -# !pip install ctransformers[cuda] -# !pip install huggingface-hub -# !pip install --upgrade sqlalchemy -# !pip install rdflib -# !pip install llama-cpp-python -# !pip install typing-extensions==4.7.1 --upgrade -# !pip install pypdf2 -# !pip install sentence-transformers==2.7.0 -# !pip install --upgrade transformers -# !pip install faiss-cpu -# !pip install bitsandbytes accelerate - -import subprocess -from multiprocessing import Pool, Process -import time - - -background_process = Process(target=subprocess.Popen, args=(["bash", "run_grobid.sh"],)) -background_process.start() -print(background_process.is_alive()) -time.sleep(15) - -# client = GrobidClient(config_path="./setting/config.json") - - - -from grobid_client.grobid_client import GrobidClient -import os -import json -import argparse - -import torch -import transformers -from huggingface_hub import login -from langchain_groq import ChatGroq -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.vectorstores import FAISS -from langchain_community.embeddings import HuggingFaceEmbeddings -from langchain.prompts import PromptTemplate -from langchain.chains import RetrievalQA -from process import * - - - -# client = GrobidClient(config_path="./setting/config.json") - -def get_context(context): - res = [] - for item in context: - res.append(item.page_content) - return res - -def clean_gen(gen): - res = {} - for line in gen.split("\n"): - if ":" in line: - try: - s, e = line.split(":") - res[s.strip()] = e - except: - pass - return res - -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") - -class SolarQA: - def __init__(self, use_platform, user_key, llm_id, hf_key, llm_platform="LOCAL", temperature=0.1, sim_model_id="Salesforce/SFR-Embedding-Mistral", input_file_path=str(), context_file_path=""): - # print(input_file_path) - self.use_platform = use_platform - self.llm_id = llm_id - self.user_key = user_key - self.hf_key = hf_key - self.llm_platform = llm_platform.lower() - self.temperature = temperature - self.sim_model_id = sim_model_id - self.sys_prompt = """ - You are an assistant for extract information from context and selection the possible answer from the selection provided. - You are given the extracted parts of a paper about solar chemistry and a question. Provide the extracted information and nothing else. - """ - self.context_file_path = context_file_path - # self.data = data - self.input_file_path = input_file_path - - self.context_result = { - "generation_model": self.llm_id, - "similarity_model": self.sim_model_id, - "similarity_metric": "Cosine_Similarity", - "result": [] - } - login(self.hf_key) - self.get_text() - self.get_vector() - print("¡¡¡Vector Store Database is prepared!!!") - self.get_llm() - - def get_text(self): - title_list = ["Abstract", "Experimental", "Results and discussion"] - # with open(self.json_path, "rb") as f: - # data = json.load(f) - # print(self.input_file_path) - if self.input_file_path[-3:] == "pdf": - data = process_paper(self.input_file_path) - else: - with open(self.input_file_path, "rb") as f: - data = json.load(f) - # print("¡¡¡PDF file has been extracted!!!") - self.context = "" - for section in data: - if section["title"] in title_list: - self.context += section["title"] - self.context += "\n" - self.context += section["content"] - self.context += "\n" - - def get_llm(self): - if self.use_platform: - if self.llm_platform == "groq": - os.environ["GROQ_API_KEY"] = self.user_key - self.llm = ChatGroq(temperature=self.temperature, model_name=self.llm_id) - else: - raise ValueError('Unsupportted Platform') - else: - try: - bnb_config = transformers.BitsAndBytesConfig( - load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", - bnb_4bit_compute_dtype=torch.bfloat16 - ) - self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.llm_id) - self.llm = transformers.AutoModelForCausalLM.from_pretrained( - self.llm_id, - torch_dtype=torch.bfloat16, - device_map="auto", - quantization_config=bnb_config - ) - self.terminators = [ - self.tokenizer.eos_token_id, - self.tokenizer.convert_tokens_to_ids("<|eot_id|>") - ] - except: - raise ValueError('Unsupportted Platform') - - def get_vector(self): - model_kwargs = {"device": "cpu"} - self.sim_model = HuggingFaceEmbeddings(model_name=self.sim_model_id, model_kwargs=model_kwargs) - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=512, - chunk_overlap=32, - length_function=len - ) - chunks = text_splitter.split_text(text=self.context) - self.vector_store = FAISS.from_texts(chunks, embedding=self.sim_model, normalize_L2=True, distance_strategy="COSINE") - - def search(self, query, k): - embed_q = self.sim_model.embed_query(query) - self.context = self.vector_store.similarity_search_with_score_by_vector(embed_q, k) - - def format_prompt(self, query, k): - self.search(query, k) - prompt = self.sys_prompt + "\n" + "Question:" - prompt += query - prompt += "\n" - prompt += "Context:" - for i in range(k): - prompt += f"{self.context[i]}\n" - return prompt - - def generation(self, query_data): - res = "" - if self.use_platform: - for key, query in query_data.items(): - new_prompt = self.format_prompt(query, 5) - messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] - outputs = self.llm.invoke(messages) - response = outputs.content - temp_res = { - "question_category": key, - "query": query, - "generation": clean_gen(response), - "evidence": [] - } - for i in range(len(self.context)): - context = self.context[i][0].page_content - sim_score = float(self.context[i][1]) - temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) - self.context_result["result"].append(temp_res) - res += response - res += "\n" - self.result = clean_gen(res) - else: - for key, query in query_data.items(): - new_prompt = self.format_prompt(query, 5) - messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] - input_ids = self.tokenizer.apply_chat_template( - messages, - add_generation_prompt=True, - return_tensors="pt" - ) - outputs = self.llm.generate( - input_ids, - max_new_tokens=1024, - eos_token_id=self.terminators, - do_sample=True, - temperature=self.temperature, - top_p=0.9, - ) - response = self.tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True) - temp_res = { - "question_category": key, - "query": query, - "generation": clean_gen(response), - "evidence": [] - } - for i in range(len(self.context)): - context = self.context[i][0].page_content - sim_score = float(self.context[i][1]) - temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) - self.context_result["result"].append(temp_res) - res += response - res += "\n" - self.result = clean_gen(res) - - def save_context(self): - # print(self.context_result) - with open(self.context_file_path, "w") as f: - json.dump(self.context_result, f) - print(f"RAG context is saved at: {self.context_file_path}") - - - -def get_parser(): - parser = argparse.ArgumentParser(description="Demo of LLM Pipeline") - parser.add_argument('--use_platform', type=lambda x:str2bool(x), default=True, help="the parameter of whether use online llm platform or use local model") - parser.add_argument('--user_key', default="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", help="if use platform, enter your key for platform", type=str) - parser.add_argument('--llm_id', default="llama-3.1-70b-versatile", help="the reference for the selected model, support grog model, huggingface llm or local model path ", type=str) - parser.add_argument('--hf_key', default="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", help="your huggingface token", type=str) - parser.add_argument('--llm_platform', default="groq", help='your platform choice', choices=["groq"], type=str) - parser.add_argument('--sim_model_id', default='Salesforce/SFR-Embedding-Mistral', help="encoder model for RAG", type=str) - parser.add_argument('--input_file_path', help='input data, extracted context from pdf', type=str) - parser.add_argument('--prompt_file_pdf', help='queries', type=str) - parser.add_argument('--context_file_path', help='save context file', type=str) - return parser - -def main(): - parser = get_parser() - args = parser.parse_args() - args_dict = vars(args) - prompt_file_pdf = args_dict["prompt_file_pdf"] - del args_dict["prompt_file_pdf"] - start_time = time.time() - solar = SolarQA(**args_dict) - print("--- %s Data Preparation and Model Loading time consumption: seconds ---" % (time.time() - start_time)) - temp_time = time.time() - with open(prompt_file_pdf, "rb") as f: - query_data = json.load(f) - solar.generation(query_data=query_data) - print(solar.result) - print("--- %s Model generation time consumption: seconds ---" % (time.time() - temp_time)) - solar.save_context() - - -print("start") -main_process = Process(target=main) -main_process.start() -if main_process.is_alive(): - print("main process is finished") -else: - background_process.kill() diff --git a/CLI/code/data/context.json b/CLI/code/data/context.json deleted file mode 100644 index 8ed839d..0000000 --- a/CLI/code/data/context.json +++ /dev/null @@ -1,154 +0,0 @@ -{ - "generation_model": "meta-llama/Llama-3.2-3B-Instruct", - "similarity_model": "Salesforce/SFR-Embedding-Mistral", - "similarity_metric": "Cosine_Similarity", - "result": [ - { - "question_category": "catalyst/co_catalyst", - "query": "\nPlease find the name of the catalyst and co-catalyst name from the provided context which describe an solar chemical experiment.\nPlease only generate the name of the catalyst and nothing else. Such as TiO2. ZrO2 or Ag, etc.\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nIf there are multiple results, please indicate them as XXX-XXX.\ncatalyst: XXX\nco_catalyst: XXX\n", - "generation": { - "catalyst": " TiO2", - "co_catalyst": " Cu" - }, - "evidence": [ - { - "pdf_reference": "of TiO 2 photocatalyst.The in situ IR experiments are still in progress to investigate the mechanism aspects of the catalyst.", - "similarity_score": 0.4707722067832947 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.49091827869415283 - }, - { - "pdf_reference": "2 catalyst.The highest methanol yield as well as quantum efficiency was achieved by 3CuO/TiO 2 indicating that this catalyst had the highest reactivity among all the prepared catalysts.Besides describing the catalyst reactivity, quantum efficiency value also figured the effectiveness of the reaction system.The effect of copper loading on methanol yields is shown in Fig. 4. The methanol yields increased with Cu loading, but then decreased when the Cu loading exceeded 3 wt.Evidently, more Cu loading can", - "similarity_score": 0.5129979848861694 - }, - { - "pdf_reference": "for Degussa P25 and 3CuO/TiO 2 catalysts.A mean value for E a of ca.26 and 12 kJ/mol was calculated for Degussa P25 and 3CuO/TiO 2 , respectively.These positive values of E a imply that the desorption of products is the rate limiting step in the photosynthetic formation of methanol.In addition, the apparent lower activation energy of 3CuO/TiO 2 catalyst may indicate a catalytic role of copper species as an active site to provide methanol, and hence improving the photoefficiency of TiO 2 photocatalyst.The", - "similarity_score": 0.5173196792602539 - }, - { - "pdf_reference": "Photocatalytic reduction of CO 2 by copper-doped titania catalysts has been investigated.The photocatalysts with various copper species (Cu 0 , Cu I , Cu II ) were prepared by an improved-impregnation method, where copper nitrate is doped into TiO 2 Degussa-P25.It is likely that copper present on the catalyst surface and the grain size of copper-titania catalysts is uniform, with crystallite size approximately 23 nm.The dispersion capacity of CuO in the vacant sites of TiO 2 is about 4.16 Cu 2 nm A2 (2.2", - "similarity_score": 0.5403363704681396 - } - ] - }, - { - "question_category": "light_source/lamp", - "query": "\nPlease find the category of Light_source and Lamp from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Light_source is Solar or Solar Simulator, Lamp is always Solar Simulator.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nLight_source: ['UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator']\nLamp: ['Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nLight_source: XXX\nLamp: XXX\n", - "generation": { - "Light_source": " Solar Simulator", - "Lamp": " Solar Simulator" - }, - "evidence": [ - { - "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", - "similarity_score": 0.47584426403045654 - }, - { - "pdf_reference": "ranging from 0.4 to 1.0 lm.The EDX/Mapping analysis of Cu on the catalysts demonstrated that Cu uniformly dispersed on TiO 2 surface, and further increase of copper leads to the increase of the dot intensities of Cu.In addition, Table 1 lists the elemental wt of Cu estimated from EDX and AAS for comparison.The difference between wt of Cu obtained by EDX on different voltage (15 and 22 kV) and AAS indicated that most Cu was on the surface of the TiO 2 . The DRS patterns of TiO 2 P25 and all CuO-loaded", - "similarity_score": 0.6357707977294922 - }, - { - "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", - "similarity_score": 0.63763827085495 - }, - { - "pdf_reference": "was used to record the diffuse reflectance spectra (DRS) of the samples.The reflectance spectra of the samples were analyzed under ambient condition in the wavelength range of 340-700 nm.Copper concentration in the catalysts was measured by atomic absorption spectroscopy (AAS) using Chem.Tech Analytical 2000 spectrophotometer.The specific surface area of catalysts was measured by multipoint BET of N 2 adsorption in a Quantachrome Autosorb-6.A SEM 515 PHILIPS scanning electron microscope (SEM) equipped with", - "similarity_score": 0.6471814513206482 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.6493161916732788 - } - ] - }, - { - "question_category": "reaction_medium", - "query": "\nPlease find the category of Reaction_medium from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Ph Value is mentioned in the experiment, Reaction_medium is always Liquid.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReaction_medium: ['Liquid', 'Gas']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReaction_medium: XXX\n", - "generation": { - "Reaction_medium": " Liquid" - }, - "evidence": [ - { - "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", - "similarity_score": 0.6080079674720764 - }, - { - "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", - "similarity_score": 0.6227010488510132 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.6527293920516968 - }, - { - "pdf_reference": "agitated the catalyst-suspended solution at the bottom to prevent sedimentation of the catalyst.The liquid sample was withdrawn every hour from a reactor column.The catalyst-suspended samples were centrifuged to separate catalyst particles from the liquid samples and then analyzed by a GC-FID of Shimadzu 8-ATC equipped with Porapak-Q column.", - "similarity_score": 0.6608579754829407 - }, - { - "pdf_reference": "the system.As a result of these sequential reactions (Eq.( 5)), the electron-hole recombination rate could be effectively reduced. To study the temperature dependence of the CO 2 photoreduction, the experiment was repeated at different temperatures in the range of 43-100 C.As can be seen from Fig. 5 the methanol yield increased for higher reaction temperatures.Saladin et al. 10 reported that the kinetic of CO 2 photoreduction can be modeled with the adsorption/desorption equilibrium of the various species", - "similarity_score": 0.6727200150489807 - } - ] - }, - { - "question_category": "reactor_type", - "query": "\nPlease find the category of Reactor_type from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReactor_type: ['Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReactor_type: XXX\n", - "generation": { - "Reactor_type": " Fixed-bed" - }, - "evidence": [ - { - "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", - "similarity_score": 0.5136107802391052 - }, - { - "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", - "similarity_score": 0.6327827572822571 - }, - { - "pdf_reference": "microscope (SEM) equipped with EDAX PV9900 PHILIPS energy dispersive X-ray (EDX) and Mapping was conducted at 15 and 22 kV to observe the morphology of catalysts, to perform semi-quantitative analysis, and to analyze distribution of Cu and Ti, respectively. The photocatalytic reactor configuration consists of a horizontal stainless steel vessel with an inner diameter and height of the vessel 140 and 50 mm, respectively.A top plate cover of the vessel was made of Pyrex and equipped with a gas bubbler and", - "similarity_score": 0.6621394157409668 - }, - { - "pdf_reference": "the system.As a result of these sequential reactions (Eq.( 5)), the electron-hole recombination rate could be effectively reduced. To study the temperature dependence of the CO 2 photoreduction, the experiment was repeated at different temperatures in the range of 43-100 C.As can be seen from Fig. 5 the methanol yield increased for higher reaction temperatures.Saladin et al. 10 reported that the kinetic of CO 2 photoreduction can be modeled with the adsorption/desorption equilibrium of the various species", - "similarity_score": 0.6621772050857544 - }, - { - "pdf_reference": "agitated the catalyst-suspended solution at the bottom to prevent sedimentation of the catalyst.The liquid sample was withdrawn every hour from a reactor column.The catalyst-suspended samples were centrifuged to separate catalyst particles from the liquid samples and then analyzed by a GC-FID of Shimadzu 8-ATC equipped with Porapak-Q column.", - "similarity_score": 0.667502760887146 - } - ] - }, - { - "question_category": "operation_mode", - "query": "\nPlease find the category of Operation_mode from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nOperation_mode: ['Batch', 'Continuous', 'Batch/Continuous']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nOperation_mode: XXX\n", - "generation": { - "Operation_mode": " Batch/Continuous" - }, - "evidence": [ - { - "pdf_reference": "to ensure that the product formed was due to the photoreduction of CO 2 .The blank tests consists of a UV-illuminated without the catalyst and a reaction in the dark with the catalyst.For typical batches, 0.3 g of catalyst powder was suspended in 300 ml of 1 M KHCO 3 solution.Ultra high purity (UHP) grade CO 2 is bubbled through the reactor for at least 30 min to purge air and to saturate the solution.The reactor is tightly closed during the reaction, and a magnetic stirrer agitated the catalyst-suspended", - "similarity_score": 0.6429235935211182 - }, - { - "pdf_reference": "equipped with a gas bubbler and sampling port.The reactor vessel was irradiated, from top of the reactor, by 6 Toki tubular black light UV lamps ( 10 W), which had total intensity of 2450 lW/cm 2 in the surface of the catalyst slurry.The reactor was enclosed by an aluminum foil reflector to optimize UV irradiation and to prevent interference from outside light.The reaction temperature was controlled with hot plate magnetic stirrer and fan cooler. Blank experiments were conducted to ensure that the product", - "similarity_score": 0.6567009687423706 - }, - { - "pdf_reference": "of TiO 2 photocatalyst.The in situ IR experiments are still in progress to investigate the mechanism aspects of the catalyst.", - "similarity_score": 0.6839278340339661 - }, - { - "pdf_reference": "other hydrocarbons might have been generated, but in small quantities which is too low to be detected.Photocatalytic activity is presented by a product yield, e.g., lmol/(g catal.), and quantum efficiency (U Q ) that can be evaluated with Eq. ( 1) below 1.This calculation is based on methanol yield at 6 h of the reaction. The results of quantum efficiency calculation are displayed in Table 2.The formation of methanol was found to be much more effective on Cu 2 loaded TiO 2 catalyst.The highest methanol", - "similarity_score": 0.6891406774520874 - }, - { - "pdf_reference": "was used to record the diffuse reflectance spectra (DRS) of the samples.The reflectance spectra of the samples were analyzed under ambient condition in the wavelength range of 340-700 nm.Copper concentration in the catalysts was measured by atomic absorption spectroscopy (AAS) using Chem.Tech Analytical 2000 spectrophotometer.The specific surface area of catalysts was measured by multipoint BET of N 2 adsorption in a Quantachrome Autosorb-6.A SEM 515 PHILIPS scanning electron microscope (SEM) equipped with", - "similarity_score": 0.6904171109199524 - } - ] - } - ] -} \ No newline at end of file diff --git a/CLI/code/data/paper1_extraction.json b/CLI/code/data/paper1_extraction.json deleted file mode 100644 index ccaf761..0000000 --- a/CLI/code/data/paper1_extraction.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "title": "Abstract", - "content": "Pure TiO 2 and various silver-enriched TiO 2 powders were prepared by the sol-gel process controlled in the reverse micellar environment.The catalysts were tested in CO 2 photocatalytic reduction and characterized by X-ray diffraction (XRD), nitrogen adsorption measurement and UV-vis.Methane and methanol were the main reduction products.The yield of methane and methanol increases when modifying the TiO 2 by silver incorporation is caused by two mechanisms: up to 5% of Ag in TiO 2 the Ag impurity band inside the TiO 2 bandgap decreases the absorption edge and increases so the electron-hole pair generation, above 5% of Ag in TiO 2 Ag metallic clusters are formed in TiO 2 crystals with Shottky barrier at the metal-semiconductor interface, which spatially separates electron and holes and increases their lifetime (decreases probability of their recombination)." - }, - { - "title": "Experimental", - "content": "Pure TiO 2 and various silver-enriched TiO 2 powders were prepared by the sol-gel process controlled in the reverse micellar environment.Pure TiO 2 was synthesized by the addition of Tita- The surface area and porous structure of prepared catalysts were evaluated from nitrogen adsorption isotherms by the BET and BJH methods, respectively.Nitrogen physical adsorption was performed on a volumetric apparatus ASAP2020 Micromeritics (USA).Before analyses the samples were dried at 105 \u2022 C for 24 h in vacuum (0.1 Pa). UV-vis diffuse reflectance spectra of samples were recorded using GBS CINTRA 303 spectrometer equipped with a diffuse reflectance attachment with a spectralon-coated integrating sphere against spectralon reference.The reflectances were recalculated to the absorption using the Schuster-Kubelka-Munk equation, where R \u221e is the diffuse reflectance from a semi-infinite layer.Absorption edge was estimated by extrapolating the decreasing portion of the spectrum to the abscissa at zero absorption.Ag-to-TiO 2 mass ratio in Ag/TiO 2 powders were determined by X-ray fluorescence (XRF) analysis using an ARL 9400 XP sequential WD-XRF spectrometer with the standard deviation lower than 5%. The purity (carbon content in weight %) of the powders was specified on an Elementar Vario EL III (Elementar).The detection limit of the apparatus was 0.1 wt.%. XRD powder diffraction patterns were obtained with the aid of a Seifert-FMP or a Panalytical-MPD laboratory diffractometer with Cu K\u2423 radiation in the conventional focusing Bragg-Brentano geometry in the diffraction angle range 2 = 10-90 \u2022 . The photocatalytic reduction of carbon dioxide was carried out in a homemade apparatus (Fig. 1). The photocatalytic reduction of carbon dioxide was carried out in a stirred batch annular reactor with a suspended catalyst illuminated by UV 8 W Hg lamp (254 nm).GC/FID/TCD was used for the analysis of gas and liquid reaction products.The details of the photocatalytic CO 2 reduction experiment and analytical methods were described in our previous publication [39].It is important to minimize the influence of transport phenomena during kinetic measurements.The elimination of CO 2 diffusion from the bulk of gas through the gas-liquid interface in a laboratory batch slurry reactor was accomplished by saturating the liquid with pure CO 2 before the reaction had been started [4,11].Catalyst loading of 1 g dm -3 was chosen to avoid concentration gradients in the bulk of stirred liquid with TiO 2 suspension due to the scattering effect of light caused by the high TiO 2 concentration [11,16,40,41].The determination of suitable volume of the liquid phase in our annular photoreactor to fulfil the requirement of perfect mixing has been published recently as 100 ml [42]. The accuracy of measurements was verified by series of repeated measurements.Average values were mentioned in this article.Maximal relative error was 5%." - }, - { - "title": "Results and discussion", - "content": "The basic textural properties of prepared titania and doped titania samples are summarised in Table 1 together with real Ag content determined by XRF.All samples possess a relatively high surface area; moreover, the positive effect of silver, which increases the surface area by about 15%, is clearly noticeable.It is obvious that the surface areas of Ag-doped TiO 2 samples varied only slightly and they are independent on the growth of Ag amount.The pore maxima of all samples are nearly the same without dependence on the Ag presence. The purity of all prepared samples was excellent, because the organic residual content was lower than 0.1 wt.%. XRD analysis confirmed the presence of the pure anatase crystallite structure for all samples.Ag was not detected in XRD pattern, probably Ag is situated in the bulk (inside the TiO 2 crystals) and/or Ag clusters are smaller than 0.3 nm.Fig. 2 shows the UV-vis spectra of Ag-TiO 2 catalysts.It is seen that (i) the maxima of the UV-vis band increased with the increasing Ag loading from 360 to 367 nm and (ii) the maximum absorption wavelength increased with increasing Ag loading, i.e. the absorption was shifted into the visible region for Ag-TiO 2 catalysts and the shift increased with the increasing amount of silver.The absorption edge (Fig. 2) values are given in Table 1. The effect of irradiation time on the formation of CO 2 photocatalytic reduction products was investigated over a period of 0-24 h on TiO 2 catalysts with different amount of Ag.Fig. 3 shows the evolution of all reaction products as the functions of the irradiation for a 7% Ag-TiO 2 catalyst.Two main products were determined: methane in the gas phase and methanol in the liquid phase.Hydrogen and low amounts of carbon monoxide were also detected.Other products such as formic acid, formaldehyde, ethane and ethylene could also be formed [2,16], but they were undetectable.The observed order of yields (mol/g cat ) order was: We assume on the basic of observed products the mechanism where the formation of carbon monoxide from \u2022 CO 2 -radical is the first step.Then both methane and methanol are formed in the parallel reactions.This mechanism was first suggested by Anpo [3]. Several authors [2,5,17] proposed the same mechanism with the exception that they predicted the formation of a carbon radical instead of surficial carbon. Comparisons of CH 4 , CH 3 OH, H 2 and CO yields over TiO 2 with different Ag content are shown in Figs.4-6A and B respectively.The yields of methane were negligible during the first 8 h of irradiation in all cases with the exception of catalyst 1 and 5% Ag-TiO 2 (Fig. 4).A similar trend of yield-time dependency was observed; only the shape for the sample with 7% Ag-TiO 2 was slightly different.A substantial increase of methane yield was observed after 8 h of irradiation.The highest yield of methane was observed for the 7% Ag-TiO 2 catalyst.The yields of methanol were one order of magnitude lower than the yields of methane (Fig. 5).Data at 5 h were measured but were under the limit of determinableness (12 g/l i.e. 0.38 mol/gcatalyst).The steepest curve of methanol yield was measured for the 7% Ag-TiO 2 catalyst. The hydrogen yields were small during the first 13 h of irradiation in all cases (Fig. 6A).After 15 h the hydrogen yields started increasing slowly.The yields of carbon monoxide increased slowly during the reaction but all yields were near the limit of detection (Fig. 6B). The data from the longest reaction time span were chosen for the comparison because the yields of all products were the highest and the most accurate.The effect of silver doping on the yields of the two main products (methane and methanol) related to the weight of the catalyst is depicted in Fig. 7.The photoreactivity of Ag-TiO 2 increases with the increase in the Ag dopant concentration.Both yields are the highest for 7% Ag-TiO 2 .The effect of the Ag dopant on the CO 2 photocatalytic reduction has not been studied yet.However, an optimum amount of Ag corresponding to the highest product yields was found for other photocatalytic reactions [27,28,34,36]. The yields of the two main products (methane and methanol) expressed per 1 m 2 of catalyst surface (Fig. 8) show similar trends with the highest yields obtained for 7% Ag-TiO 2 .This indicates that the specific surface area is not the most decisive parameter in the CO 2 photocatalytic reduction. Doping with metals or metal oxides could cause two effects: (1) Firstly, it creates impurity band inside the TiO 2 bandgap, thus shifting the absorption edge to the visible region [29].The decrease of the absorption edge and the shift of absorption wavelength into the visible region in this paper are clearly visible from Table 1 and Fig. 2, for Ag content up to 5%.The decrease of the absorption edge increases the electron and hole pair photogeneration, which causes yields increase of CO 2 photoreduction.(2) Secondly, for Ag content above 5% silver atoms are not anymore randomly located in TiO 2 crystal, but forms probably metallic clusters inside TiO 2 crystal.These metal clusters could provoke a decrease in the electron-hole recombination rate [43,44].This hypothesis was based on the fact that the Fermi level of TiO 2 is higher than that of silver metals [45].Such phenomenon would be expected to lead to the formation of a Schottky barrier at metal-semiconductor contact region, which causes the charge separation, the decrease of electron-hole recombination rate and consequently increase of the TiO 2 photocatalytic activity.The above mentioned fact could be the reason for observed yields increases with increased Ag content in our experiments with CO 2 photocatalytic reduction and is the main reason of yields increase with 7% Ag-TiO 2 since the absorption edge has same value as 5% Ag-TiO 2 . Our results are in agreement with other works dealing with photocatalytic activity of Ag-doped TiO 2 .For example, the presence of Ag in TiO 2 increased the yield of dye degradation [27,28,31,32], photooxidation of 2-propanol [29], degradation Fig. 8. Dependence of the product yields related to specific surface area (after 24 h of irradiation) on the different catalysts. of 1,4-dichlorobenzene [33] and degradation of oxalic acid [37]." - } -] \ No newline at end of file diff --git a/CLI/code/data/prompts.json b/CLI/code/data/prompts.json deleted file mode 100644 index b6dc5eb..0000000 --- a/CLI/code/data/prompts.json +++ /dev/null @@ -1 +0,0 @@ -{"catalyst/co_catalyst": "\nPlease find the name of the catalyst and co-catalyst name from the provided context which describe an solar chemical experiment.\nPlease only generate the name of the catalyst and nothing else. Such as TiO2. ZrO2 or Ag, etc.\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nIf there are multiple results, please indicate them as XXX-XXX.\ncatalyst: XXX\nco_catalyst: XXX\n", "light_source/lamp": "\nPlease find the category of Light_source and Lamp from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Light_source is Solar or Solar Simulator, Lamp is always Solar Simulator.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nLight_source: ['UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator']\nLamp: ['Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nLight_source: XXX\nLamp: XXX\n", "reaction_medium": "\nPlease find the category of Reaction_medium from the provided context which describe an solar chemical experiment.\nThe generation condition of the extraction is given: If Ph Value is mentioned in the experiment, Reaction_medium is always Liquid.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReaction_medium: ['Liquid', 'Gas']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReaction_medium: XXX\n", "reactor_type": "\nPlease find the category of Reactor_type from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nReactor_type: ['Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nReactor_type: XXX\n", "operation_mode": "\nPlease find the category of Operation_mode from the provided context which describe an solar chemical experiment.\nPlease only select the generation from the provided possible choices.\nPossible Choices:\nOperation_mode: ['Batch', 'Continuous', 'Batch/Continuous']\nPlease generating restrictively follow the format, and must start the generation as the format. Do not generate anything else.\nOperation_mode: XXX\n"} \ No newline at end of file diff --git a/CLI/code/process.py b/CLI/code/process.py deleted file mode 100644 index d258961..0000000 --- a/CLI/code/process.py +++ /dev/null @@ -1,130 +0,0 @@ -from grobid_client.grobid_client import GrobidClient -import xml.etree.ElementTree as ET -import json -import pandas as pd -import re -import unicodedata -import logging - -# Configurar el registro (logging) -logging.basicConfig(level=logging.ERROR, format='%(asctime)s:%(levelname)s:%(message)s') - -# GROBID client configuration -client = GrobidClient(config_path="./settings/config.json") -service_name = "processFulltextDocument" - -def process_fulltext_document(service, file): - try: - rsp = client.process_pdf(service, file, - generateIDs=True, - consolidate_header=True, - consolidate_citations=False, - include_raw_citations=False, - include_raw_affiliations=False, - tei_coordinates=True, - segment_sentences=True) - if rsp[1] == 200: # HTTP status check - return rsp[2] # rsp[2] is the XML string - else: - print(f"Error: {rsp[1]}") - except Exception as e: - print(f"Exception during PDF processing: {e}") - return None - -def clean_text(text): - """ Limpia y normaliza el texto para eliminar caracteres no deseados. """ - text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii') # Normalización Unicode - text = re.sub(r'[^\x00-\x7F]+', ' ', text) # Eliminar todos los caracteres no ASCII - text = re.sub(r'[\u00b0\n\t\r]', ' ', text) # Eliminar caracteres específicos - text = re.sub(r'[^A-Za-z0-9\s,.?!;:()\-\'\"/]', '', text) # Mantener solo caracteres alfanuméricos y puntuación básica - text = re.sub(r'\s+', ' ', text).strip() # Reemplazar múltiples espacios por uno solo - return text - -def extract_sections_from_xml(xml_content): - root = ET.fromstring(xml_content) - ns = {'tei': 'http://www.tei-c.org/ns/1.0'} - - # Extraer título - title = '' - title_element = root.find(".//tei:title", ns) - if title_element is not None: - title = ''.join(title_element.itertext()).strip() - - # Función para extraer contenido bajo una etiqueta específica - def extract_content_by_tag(tag_name): - content = [] - path = f".//{{{ns['tei']}}}{tag_name}" - for elem in root.findall(path): - text = ''.join(elem.itertext()) - if text: - content.append(clean_text(text.strip())) - return " ".join(content) - - # Función para extraer contenido según palabras clave - def extract_content_by_keywords(start_keywords, end_section): - content = [] - capture = False - for elem in root.iter(): - if elem.tag.endswith("head"): - if elem.text and any(keyword.lower() in elem.text.lower() for keyword in start_keywords): - capture = True - elif elem.text and end_section.lower() in elem.text.lower(): - break - if capture and elem.tag.endswith("p"): - text = ''.join(elem.itertext()).strip() - if text: - content.append(clean_text(text)) - return " ".join(content) - - # Nueva función para extraer el DOI - def extract_doi(): - """Extrae el DOI del documento usando su etiqueta específica.""" - doi_element = root.find(".//tei:idno[@type='DOI']", ns) - if doi_element is not None: - return clean_text(doi_element.text.strip()) - return None - - # Extraer el DOI - doi = extract_doi() - - # Construcción de las secciones - sections = [ - {"title": "Doi", "content": doi if doi else "Doi not found"}, # Incluir el DOI como primera sección - {"title": "Article_Title", "content": title}, # Agregar el título como primera sección - {"title": "Abstract", "content": extract_content_by_tag("abstract")}, - {"title": "Experimental", "content": extract_content_by_keywords( - ["Experimental", "Experimental studies", "Experiments", "Experimental methods", "Methods"], "Results and discussion")}, - {"title": "Results_and_discussion", "content": extract_content_by_keywords( - ["Results and discussion", "Result and discussion", "Results"], "Conclusion")}, - {"title": "Conclusions", "content": extract_content_by_keywords( - ["Conclusion", "Conclusions"], "Conclusion")}, - ] - - # Sección opcional "Supporting Information" - supporting_content = extract_content_by_keywords( - ["Supporting Information", "Supporting"], "Conclusion") - if supporting_content: - sections.append({"title": "Supporting_Information", "content": supporting_content}) - - return sections - - -def process_paper(pdf_file_path): - - try: - # Procesar el PDF con Grobid para obtener el XML - xml_response = process_fulltext_document(service_name, pdf_file_path) - - if xml_response: - # Extraer secciones relevantes del XML - sections = extract_sections_from_xml(xml_response) - return sections # Devolver el resultado como una lista de objetos JSON - else: - return [{"error": "Error processing the PDF."}] - except Exception as e: - logging.error(f"Error processing paper: {e}") - return [{"error": str(e)}] - - - - diff --git a/CLI/code/prompt.json b/CLI/code/prompt.json deleted file mode 100644 index 219922f..0000000 --- a/CLI/code/prompt.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "catalyst": "What is the chemical name of the catalyst used in the experiment?", - "co_catalyst": "What is the chemical name of the co_catalyst used in the experiment?", - "Light_source": "From the choices provided, which light source is used in the experiment?", - "Lamp": "From the choices provided, which lamp is used in the experiment?", - "Reaction_medium": "From the choices provided, which reaction medium is used in the experiment?", - "Reactor_type": "From the choices provided, which reactor type is used in the experiment?", - "Operation_mode": "From the choices provided, which oepration mode is used in the experiment?" -} \ No newline at end of file diff --git a/CLI/code/run_benchmark.py b/CLI/code/run_benchmark.py deleted file mode 100644 index 375a6c8..0000000 --- a/CLI/code/run_benchmark.py +++ /dev/null @@ -1,247 +0,0 @@ -import os -import time -from grobid_client.grobid_client import GrobidClient -import json -import argparse - -import torch -import transformers -from huggingface_hub import login -from langchain_groq import ChatGroq -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.vectorstores import FAISS -from langchain_community.embeddings import HuggingFaceEmbeddings -from langchain.prompts import PromptTemplate -from langchain.chains import RetrievalQA -from process import * - - -def get_context(context): - res = [] - for item in context: - res.append(item.page_content) - return res - -def clean_gen(gen): - res = {} - for line in gen.split("\n"): - if ":" in line: - try: - s, e = line.split(":") - res[s.strip()] = e - except: - pass - return res - -def str2bool(v): - return v.lower() in ("yes", "true", "t", "1") - -class SolarQA: - def __init__(self, use_platform, user_key, llm_id, hf_key, llm_platform="LOCAL", temperature=0.1, sim_model_id="Salesforce/SFR-Embedding-Mistral", input_file_path=str(), context_file_path=""): - # print(input_file_path) - self.use_platform = use_platform - self.llm_id = llm_id - self.user_key = user_key - self.hf_key = hf_key - self.llm_platform = llm_platform.lower() - self.temperature = temperature - self.sim_model_id = sim_model_id - self.sys_prompt = """ - You are an assistant for extract information from context and selection the possible answer from the selection provided. - You are given the extracted parts of a paper about solar chemistry and a question. Provide the extracted information and nothing else. - """ - self.context_file_path = context_file_path - # self.data = data - self.input_file_path = input_file_path - - self.context_result = { - "generation_model": self.llm_id, - "similarity_model": self.sim_model_id, - "similarity_metric": "Cosine_Similarity", - "result": [] - } - login(self.hf_key) - self.get_text() - self.get_vector() - print("¡¡¡Vector Store Database is prepared!!!") - self.get_llm() - - def get_text(self): - title_list = ["Abstract", "Experimental", "Results and discussion"] - # with open(self.json_path, "rb") as f: - # data = json.load(f) - # print(self.input_file_path) - if self.input_file_path[-3:] == "pdf": - data = process_paper(self.input_file_path) - else: - with open(self.input_file_path, "rb") as f: - data = json.load(f) - # print("¡¡¡PDF file has been extracted!!!") - self.context = "" - for section in data: - if section["title"] in title_list: - self.context += section["title"] - self.context += "\n" - self.context += section["content"] - self.context += "\n" - - def get_llm(self): - if self.use_platform: - if self.llm_platform == "groq": - os.environ["GROQ_API_KEY"] = self.user_key - self.llm = ChatGroq(temperature=self.temperature, model_name=self.llm_id) - else: - raise ValueError('Unsupportted Platform') - else: - try: - bnb_config = transformers.BitsAndBytesConfig( - load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", - bnb_4bit_compute_dtype=torch.bfloat16 - ) - self.tokenizer = transformers.AutoTokenizer.from_pretrained(self.llm_id) - self.llm = transformers.AutoModelForCausalLM.from_pretrained( - self.llm_id, - torch_dtype=torch.bfloat16, - device_map="auto", - quantization_config=bnb_config - ) - self.terminators = [ - self.tokenizer.eos_token_id, - self.tokenizer.convert_tokens_to_ids("<|eot_id|>") - ] - except: - raise ValueError('Unsupportted Platform') - - def get_vector(self): - model_kwargs = {"device": "cpu"} - self.sim_model = HuggingFaceEmbeddings(model_name=self.sim_model_id, model_kwargs=model_kwargs) - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=512, - chunk_overlap=32, - length_function=len - ) - chunks = text_splitter.split_text(text=self.context) - self.vector_store = FAISS.from_texts(chunks, embedding=self.sim_model, normalize_L2=True, distance_strategy="COSINE") - - def search(self, query, k): - embed_q = self.sim_model.embed_query(query) - self.context = self.vector_store.similarity_search_with_score_by_vector(embed_q, k) - - def format_prompt(self, query, k): - self.search(query, k) - prompt = self.sys_prompt + "\n" + "Question:" - prompt += query - prompt += "\n" - prompt += "Context:" - for i in range(k): - prompt += f"{self.context[i]}\n" - return prompt - - def generation(self, query_data): - res = "" - if self.use_platform: - for key, query in query_data.items(): - new_prompt = self.format_prompt(query, 5) - messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] - outputs = self.llm.invoke(messages) - response = outputs.content - temp_res = { - "question_category": key, - "query": query, - "generation": clean_gen(response), - "evidence": [] - } - for i in range(len(self.context)): - context = self.context[i][0].page_content - sim_score = float(self.context[i][1]) - temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) - self.context_result["result"].append(temp_res) - res += response - res += "\n" - self.result = clean_gen(res) - else: - for key, query in query_data.items(): - new_prompt = self.format_prompt(query, 5) - messages = [{"role": "system", "content": self.sys_prompt}, {"role": "user", "content": new_prompt}] - input_ids = self.tokenizer.apply_chat_template( - messages, - add_generation_prompt=True, - return_tensors="pt" - ) - outputs = self.llm.generate( - input_ids, - max_new_tokens=1024, - eos_token_id=self.terminators, - do_sample=True, - temperature=self.temperature, - top_p=0.9, - ) - response = self.tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True) - temp_res = { - "question_category": key, - "query": query, - "generation": clean_gen(response), - "evidence": [] - } - for i in range(len(self.context)): - context = self.context[i][0].page_content - sim_score = float(self.context[i][1]) - temp_res["evidence"].append({"pdf_reference": context, "similarity_score": sim_score}) - self.context_result["result"].append(temp_res) - res += response - res += "\n" - self.result = clean_gen(res) - - def save_context(self): - # print(self.context_result) - with open(self.context_file_path, "w") as f: - json.dump(self.context_result, f) - print(f"RAG context is saved at: {self.context_file_path}") - - -def run_bench(input_folder, output_folder): - dir_list = os.listdir(input_folder) - for i in range(len(dir_list)): - args_dict = { - "use_platform": str2bool("False"), - "user_key": "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", - "llm_id": "meta-llama/Llama-3.2-3B-Instruct", - "hf_key": "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", - "llm_platform": "groq", - "sim_model_id": "Salesforce/SFR-Embedding-Mistral", - "input_file_path": "", - "prompt_file_pdf": "/home/jovyan/Solar/CLI/data/prompts.json", - "context_file_path": "" - } - input_dir = input_folder + f"paper_{i+1}.json" - output_dir = output_folder + f"result_{i+1}.json" - # result_list = os.listdir(output_folder) - if os.path.exists(input_dir): - if os.path.exists(output_dir): - pass - else: - args_dict["input_file_path"] = input_dir - args_dict["context_file_path"] = output_dir - print(args_dict) - prompt_file_pdf = args_dict["prompt_file_pdf"] - del args_dict["prompt_file_pdf"] - start_time = time.time() - solar = SolarQA(**args_dict) - print("--- %s Data Preparation and Model Loading time consumption: seconds ---" % (time.time() - start_time)) - temp_time = time.time() - with open(prompt_file_pdf, "rb") as f: - query_data = json.load(f) - solar.generation(query_data=query_data) - print(solar.result) - print("--- %s Model generation time consumption: seconds ---" % (time.time() - temp_time)) - solar.save_context() - - - - -input_folder = "/home/jovyan/Solar/data/paper_all/" -output_folder = "/home/jovyan/Solar/CLI/context/" -run_bench(input_folder, output_folder) - - - \ No newline at end of file diff --git a/CLI/code/run_factrag.sh b/CLI/code/run_factrag.sh deleted file mode 100644 index 4b838f9..0000000 --- a/CLI/code/run_factrag.sh +++ /dev/null @@ -1,29 +0,0 @@ -# !/bin/bash - -llm_id='llama3.2:3b' -embedding_id='nomic-embed-text' -prompt_file='/home/jovyan/GraphRAG/self_developed/prompt.json' - -# Define the target directory -input_directory="/home/jovyan/GraphRAG/self_developed/10_bench" -output_directory="/home/jovyan/GraphRAG/self_developed/context/" - - -if [ ! -d "$input_directory" ]; then - exit 1 -fi - -for file in "$input_directory"/*; do - if [ -f "$file" ]; then - var=$(echo "$file" | cut -d "_" -f 4) - ind=$(echo "$var" | cut -d "." -f 1) - output_file="${output_directory}result_${ind}.json" - if [ -f "$output_file" ]; then - echo "file exist" - else - python FactsRAG.py --llm_id "$llm_id" --embedding_id "$embedding_id" --input_file_path "$file" --prompt_file "$prompt_file" --context_file_path "$output_file" - echo $output_file - fi - fi -echo "DONE" -done \ No newline at end of file diff --git a/CLI/code/run_grobid.sh b/CLI/code/run_grobid.sh deleted file mode 100644 index 5e329cc..0000000 --- a/CLI/code/run_grobid.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# # download GROBID if directory does not exist -declare -r GROBID_VERSION="0.8.0" # or change to current stable version 0.6.2 - -# if [ ! -d grobid-${GROBID_VERSION} ]; then -# wget https://github.com/kermitt2/grobid/archive/${GROBID_VERSION}.zip -# unzip "${GROBID_VERSION}.zip" -# rm "${GROBID_VERSION}.zip" -# fi - - -# run GROBID -cd ~ -cd /home/jovyan/Solar/CLI/grobid-0.8.0 -./gradlew run \ No newline at end of file diff --git a/CLI/code/setting/config.json b/CLI/code/setting/config.json deleted file mode 100644 index 789c3fa..0000000 --- a/CLI/code/setting/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "grobid_server": "http://localhost:8070", - "batch_size": 1000, - "sleep_time": 5, - "timeout": 60, - "coordinates": ["persName", "figure", "ref", "biblStruct", "formula", "s"] -} diff --git a/CLI/code/settings/config.json b/CLI/code/settings/config.json deleted file mode 100644 index 789c3fa..0000000 --- a/CLI/code/settings/config.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "grobid_server": "http://localhost:8070", - "batch_size": 1000, - "sleep_time": 5, - "timeout": 60, - "coordinates": ["persName", "figure", "ref", "biblStruct", "formula", "s"] -} From d00f504b6a60528accda68f7a118a221cf63ceae Mon Sep 17 00:00:00 2001 From: Clark Wang <107419732+OEG-Clark@users.noreply.github.com> Date: Fri, 13 Dec 2024 11:04:20 +0100 Subject: [PATCH 4/4] Add files via upload --- CLI/README.md | 42 +++ CLI/Result_Example/SolarFactRAG_Example.json | 1 + CLI/Result_Example/SolarNavieRAG_Example.json | 1 + CLI/Result_Spec.md | 78 +++++ CLI/code/FactsRAG.py | 280 ++++++++++++++++++ CLI/code/process.py | 130 ++++++++ CLI/code/prompt.json | 9 + CLI/code/run_factrag.sh | 29 ++ CLI/code/settings/config.json | 7 + 9 files changed, 577 insertions(+) create mode 100644 CLI/README.md create mode 100644 CLI/Result_Example/SolarFactRAG_Example.json create mode 100644 CLI/Result_Example/SolarNavieRAG_Example.json create mode 100644 CLI/Result_Spec.md create mode 100644 CLI/code/FactsRAG.py create mode 100644 CLI/code/process.py create mode 100644 CLI/code/prompt.json create mode 100644 CLI/code/run_factrag.sh create mode 100644 CLI/code/settings/config.json diff --git a/CLI/README.md b/CLI/README.md new file mode 100644 index 0000000..1ff6457 --- /dev/null +++ b/CLI/README.md @@ -0,0 +1,42 @@ +# SolarRAG CLI + +> **_Solar CLI:_** This CLI contains SolarFactRAG and SolarNaiveRAG pipeline for [solar-qa pipeline](https://github.com/oeg-upm/solar-qa-eval) + +## Install + +#### 1. install all python packages + +```console +pip install ollama networkx leidenalg cdlib python-igraph python-dotenv langchain huggingface_hub langchain-ollama==0.1.0 langchain-community==0.2.19 sentence-transformers==2.7.0 grobid-client-python==0.0.3 +``` + +#### 2. install ollama + +more details about ollama please visit the [offical ollama website](https://ollama.com/) + +```console +curl -fsSL https://ollama.com/install.sh | sh +``` + +#### 3. install grobid client + +please reference this part to the [offical grobid github page](https://grobid.readthedocs.io/en/latest/Install-Grobid/) + +## Run Solar CLI + +After installed all prerequisite libraries and software, you can simply run the cli by: + +```console +python SolarRAG.py --llm_id llama3.2:3b --embedding_id nomic-embed-text --input_file_path XXX/paper_2.pdf --prompt_file XXX/prompt.json --context_file_path XXX/test.json --rag_type fact +``` + +Here is a table that describe the parameters to run the FactRAG cli + +| Parameter | Definition | DataType | Example | +| -------- | ------- | ------- | ------- | +| llm_id | the parameter of which LLM model from ollama to use | String | [llama3.2:3b](https://ollama.com/library/llama3.2) | +| embedding_id | the parameter of which embedding model from ollama to use | String | [nomic-embed-text](https://ollama.com/library/nomic-embed-text) | +| input_file_path | path for input data, pdf file or extracted json file | String | ../paper_1.pdf | +| prompt_file | path for the prompt json file | String | ../code/prompt.json | +| context_file_path | path for save the output json file | String | ../FactRAG/context.json | +| rag_type | the type of rag pipeline, range=['fact', 'naive'] | String | fact | \ No newline at end of file diff --git a/CLI/Result_Example/SolarFactRAG_Example.json b/CLI/Result_Example/SolarFactRAG_Example.json new file mode 100644 index 0000000..6eac6b7 --- /dev/null +++ b/CLI/Result_Example/SolarFactRAG_Example.json @@ -0,0 +1 @@ +{"paper_title": "Photocatalytic Reduction of Greenhouse Gas CO2 to Fuel", "DOI": "10.1007/s10563-009-9065-9", "generation_model": "llama3.2:3b", "similarity_model": "nomic-embed-text", "similarity_metric": "Cosine_Similarity", "rag_type": "fact", "result": [{"question_category": "catalyst", "query": "What is the chemical name of the catalyst used in the experiment?", "generation": "###catalyst:\nTTIP (Titanium(IV) Isopropoxide).", "RAG_source": "generated_facts", "selected_answer": {"catalyst": "\nTTIP (Titanium(IV) Isopropoxide)."}, "evidences": [{"similairty_score": 0.6745214462280273, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.6460967063903809, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed.", "generated_facts": "Facts:\n1. Hydrocarbons are produced by Methane.\n2. Hydrocarbons are produced by Ethylene.\n3. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Methane.\n4. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Ethylene.\n5. The Dye-sensitized Cu-Fe/P25 photocatalyst harnesses Light energy.\n6. The Dye-sensitized Cu-Fe/P25 photocatalyst is enhanced by Sunlight.\n7. CO2 (carbon dioxide) is photoreduced by In situ IR spectroscopy.\n8. Methane is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst.\n9. Ethylene is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst."}, {"similairty_score": 0.6339464783668518, "pdf_reference": "0 -bipyridyl-4,4 0 -dicarboxylate) 2 -(NCS) 2 (also called N3-dye, Solaronix) was used as a dye sensitizer that was dissolved in ethanol (99.5) to obtain a 3 mM dye solution. P25 slurry with corresponding metal salts was prepared by adding aqueous PEG solution with the metal salts to P25 TiO2 fiber Fig. 2 The schematic of light transmission and spread of TiO 2 coated optical fiber powder in a mortar under vigorous grinding with pestle 36.The prepared uniform lump-free slurry was coated on optical fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding", "generated_facts": "Facts:\n1. -bipyridyl-4,4'-dicarboxylate is used as a chemical compound.\n2. NCS is used as a chemical compound.\n3. P25 TiO2 fiber is coated with P25 TiO2.\n4. Metal salts are added to PEG solution to prepare slurry.\n5. Solaronix (N3-dye) is used as a dye.\n6. Ethanol (99.5%) is used as a solvent for the dye solution.\n7. Optical fibers are coated using dip-coating method.\n8. Grinding is performed using mortar and pestle.\n9. P25 TiO2 fiber is used as a photocatalyst material."}, {"similairty_score": 0.6232354044914246, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced", "generated_facts": "Facts: \n1. The optical-fiber photoreactor is designed to be coated with Ag/TiO\u2082 photocatalyst-coated fibers.\n2. The optical-fiber photoreactor comprises nearly 120 Ag/TiO\u2082 photocatalyst-coated fibers.\n3. The photocatalyst-coated fibers are coated with Anatase phase.\n4. All photocatalysts contain the Anatase phase.\n5. Increasing UV light intensity increases methanol yield.\n6. The UV light intensity is set to 10 W/cm\u00b2.\n7. The Ag/TiO\u2082 photocatalyst is used under a light intensity of 10 W/cm\u00b2.\n8. The TiO\u2082-SiO\u2082 mixed oxide is doped with Cu metal and Fe metal.\n9. Photocatalysts exhibit substantial differences in hydrocarbon production.\n10. Hydrocarbon production is affected by product selectivity.\n11. Product selectivity is influenced by Cu metal and Fe metal.\n12. The TiO\u2082-SiO\u2082 mixed oxide doped with Cu and Fe metals produces methane.\n13. The TiO\u2082-SiO\u2082 mixed oxide doped with Cu and Fe metals also produces ethylene."}, {"similairty_score": 0.6223254799842834, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}]}, {"question_category": "co_catalyst", "query": "What is the chemical name of the co_catalyst used in the experiment?", "generation": "###co_catalyst:\nacac", "RAG_source": "generated_facts", "selected_answer": {"co_catalyst": "\nacac"}, "evidences": [{"similairty_score": 0.65687096118927, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.6408281922340393, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed.", "generated_facts": "Facts:\n1. Hydrocarbons are produced by Methane.\n2. Hydrocarbons are produced by Ethylene.\n3. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Methane.\n4. The Cu-Fe loaded TiO2-SiO2 photocatalyst produces Ethylene.\n5. The Dye-sensitized Cu-Fe/P25 photocatalyst harnesses Light energy.\n6. The Dye-sensitized Cu-Fe/P25 photocatalyst is enhanced by Sunlight.\n7. CO2 (carbon dioxide) is photoreduced by In situ IR spectroscopy.\n8. Methane is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst.\n9. Ethylene is produced on the Cu-Fe loaded TiO2-SiO2 photocatalyst."}, {"similairty_score": 0.6391236186027527, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was", "generated_facts": "Facts: \n1. Thermocouple is placed at Reactor.\n2. Reactor was purged by CO2 gas.\n3. Reactor was purged by Distilled water.\n4. CO2 gas bubbled through Distilled water.\n5. Space velocity of CO2 gas and H2O vapor.\n6. Space velocity of CO2 gas and H2O vapor was maintained at nearly 0.72 h-1.\n7. Photo reactions were carried out at steady state by Reactor.\n8. Photo reactions lasted for 6-24 h.\n9. Catalysts were repeated two or three times on Reactor.\n10. Catalysts deactivation was found to be negligible.\n11. Outlet gases were analyzed by GC equipment.\n12. GC equipment was equipped with FID and Porapak Q column.\n13. TiO2 underwent thermal-hydrolysis.\n14. CO2 was photoreduced on Thermal-hydrolyzed TiO2."}, {"similairty_score": 0.6241745948791504, "pdf_reference": "0 -bipyridyl-4,4 0 -dicarboxylate) 2 -(NCS) 2 (also called N3-dye, Solaronix) was used as a dye sensitizer that was dissolved in ethanol (99.5) to obtain a 3 mM dye solution. P25 slurry with corresponding metal salts was prepared by adding aqueous PEG solution with the metal salts to P25 TiO2 fiber Fig. 2 The schematic of light transmission and spread of TiO 2 coated optical fiber powder in a mortar under vigorous grinding with pestle 36.The prepared uniform lump-free slurry was coated on optical fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding", "generated_facts": "Facts:\n1. -bipyridyl-4,4'-dicarboxylate is used as a chemical compound.\n2. NCS is used as a chemical compound.\n3. P25 TiO2 fiber is coated with P25 TiO2.\n4. Metal salts are added to PEG solution to prepare slurry.\n5. Solaronix (N3-dye) is used as a dye.\n6. Ethanol (99.5%) is used as a solvent for the dye solution.\n7. Optical fibers are coated using dip-coating method.\n8. Grinding is performed using mortar and pestle.\n9. P25 TiO2 fiber is used as a photocatalyst material."}, {"similairty_score": 0.6187817454338074, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}]}, {"question_category": "Light_source", "query": "From the choices provided, which light source is used in the experiment?", "generation": "###Light_source: 'Solar'", "RAG_source": "generated_facts", "selected_answer": {"Light_source": " 'Solar'"}, "evidences": [{"similairty_score": 0.6323468089103699, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.6295710802078247, "pdf_reference": "transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig. 3.The fibers with length of nearly 11 cm long were inserted into the OFPR.The optical fibers were supported on circular plates with a diameter of 5.0 cm.The OFPR was irradiated by artificial light or concentrated natural sunlight through the quartz window at one side.The artificial light (320-500 nm) was supplied by Exfo Acticure 4,000 with a highest intensity at 365 nm using an appropriate color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar", "generated_facts": "Facts:\n1. Light is transmitted by a fiber-supported TiO2 film.\n2. A fiber-supported TiO2 film is inserted into fibers.\n3. Fibers are supported on circular plates.\n4. Circular plates have a diameter of 5.0 cm.\n5. An OFPR module is irradiated with artificial light or concentrated natural sunlight.\n6. Artificial light is supplied by the Exfo Acticure 4,000.\n7. The Exfo Acticure 4,000 uses an appropriate color filter.\n8. Light intensity can be measured with a Lumen meter (Exfo).\n9. Solar irradiates an OFPR module.\n10. Concentrated natural sunlight enters through a quartz window.\n\nInferred Facts:\n11. TiO2 is used in a fiber-supported TiO2 film.\n12. An OFPR processes and repairs fibers.\n13. A quartz window allows concentrated natural sunlight to enter."}, {"similairty_score": 0.629106342792511, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The", "generated_facts": "Facts:\n1. The Lumen meter measures light intensity.\n2. The solar concentrator tracks the sun's trajectory in Japan.\n3. The solar concentrator collects natural sunlight.\n4. The reflection dish of the solar concentrator also tracks the sun's trajectory.\n5. Optical cables transmit concentrated sunlight.\n6. Windows receive focused sunlight from the optical cables.\n7. Photoreactors contain windows that receive sunlight.\n8. Heating tapes wrap around photoreactors to regulate temperature.\n9. Temperature controllers control the heating tape temperature.\n10. Thermocouples measure reactor temperatures.\n11. Days have maximum sunlight intensity.\n12. The solar concentrator is used to collect natural sunlight in Japan.\n13. Lumen meters can be used to tune and measure light intensity.\n\nNote: I've rephrased some of the relationships to make them more concise and clear, while maintaining their original meaning."}, {"similairty_score": 0.6268292665481567, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}, {"similairty_score": 0.6255613565444946, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption", "generated_facts": "Facts:\n1. The electric heater is connected to a sample cup.\n2. The temperature controller is directly connected to a thermocouple and a heater.\n3. The HVC has a thermocouple and a heater.\n4. The EXFO manufactured the OmniCure 1000.\n5. The OmniCure 1000 is a type of UV light source.\n6. The 100 W Hg lamp is a type of UV light source.\n7. The optical cable is connected to a quartz window.\n8. The quartz window has UV light (light from the UV light source).\n9. The TiO2 film absorbs light with wavelength below 380 nm."}]}, {"question_category": "Lamp", "query": "From the choices provided, which lamp is used in the experiment?", "generation": "###Lamp: 'LED'", "RAG_source": "generated_facts", "selected_answer": {"Lamp": " 'LED'"}, "evidences": [{"similairty_score": 0.6355500221252441, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.6323181986808777, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption", "generated_facts": "Facts:\n1. The electric heater is connected to a sample cup.\n2. The temperature controller is directly connected to a thermocouple and a heater.\n3. The HVC has a thermocouple and a heater.\n4. The EXFO manufactured the OmniCure 1000.\n5. The OmniCure 1000 is a type of UV light source.\n6. The 100 W Hg lamp is a type of UV light source.\n7. The optical cable is connected to a quartz window.\n8. The quartz window has UV light (light from the UV light source).\n9. The TiO2 film absorbs light with wavelength below 380 nm."}, {"similairty_score": 0.6160007119178772, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}, {"similairty_score": 0.6134966611862183, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The", "generated_facts": "Facts:\n1. The Lumen meter measures light intensity.\n2. The solar concentrator tracks the sun's trajectory in Japan.\n3. The solar concentrator collects natural sunlight.\n4. The reflection dish of the solar concentrator also tracks the sun's trajectory.\n5. Optical cables transmit concentrated sunlight.\n6. Windows receive focused sunlight from the optical cables.\n7. Photoreactors contain windows that receive sunlight.\n8. Heating tapes wrap around photoreactors to regulate temperature.\n9. Temperature controllers control the heating tape temperature.\n10. Thermocouples measure reactor temperatures.\n11. Days have maximum sunlight intensity.\n12. The solar concentrator is used to collect natural sunlight in Japan.\n13. Lumen meters can be used to tune and measure light intensity.\n\nNote: I've rephrased some of the relationships to make them more concise and clear, while maintaining their original meaning."}, {"similairty_score": 0.6098795533180237, "pdf_reference": "transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig. 3.The fibers with length of nearly 11 cm long were inserted into the OFPR.The optical fibers were supported on circular plates with a diameter of 5.0 cm.The OFPR was irradiated by artificial light or concentrated natural sunlight through the quartz window at one side.The artificial light (320-500 nm) was supplied by Exfo Acticure 4,000 with a highest intensity at 365 nm using an appropriate color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar", "generated_facts": "Facts:\n1. Light is transmitted by a fiber-supported TiO2 film.\n2. A fiber-supported TiO2 film is inserted into fibers.\n3. Fibers are supported on circular plates.\n4. Circular plates have a diameter of 5.0 cm.\n5. An OFPR module is irradiated with artificial light or concentrated natural sunlight.\n6. Artificial light is supplied by the Exfo Acticure 4,000.\n7. The Exfo Acticure 4,000 uses an appropriate color filter.\n8. Light intensity can be measured with a Lumen meter (Exfo).\n9. Solar irradiates an OFPR module.\n10. Concentrated natural sunlight enters through a quartz window.\n\nInferred Facts:\n11. TiO2 is used in a fiber-supported TiO2 film.\n12. An OFPR processes and repairs fibers.\n13. A quartz window allows concentrated natural sunlight to enter."}]}, {"question_category": "Reaction_medium", "query": "From the choices provided, which reaction medium is used in the experiment?", "generation": "###Reaction_medium: Liquid", "RAG_source": "generated_facts", "selected_answer": {"Reaction_medium": " Liquid"}, "evidences": [{"similairty_score": 0.6295759081840515, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.5933018922805786, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.5931262969970703, "pdf_reference": "0 -bipyridyl-4,4 0 -dicarboxylate) 2 -(NCS) 2 (also called N3-dye, Solaronix) was used as a dye sensitizer that was dissolved in ethanol (99.5) to obtain a 3 mM dye solution. P25 slurry with corresponding metal salts was prepared by adding aqueous PEG solution with the metal salts to P25 TiO2 fiber Fig. 2 The schematic of light transmission and spread of TiO 2 coated optical fiber powder in a mortar under vigorous grinding with pestle 36.The prepared uniform lump-free slurry was coated on optical fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding", "generated_facts": "Facts:\n1. -bipyridyl-4,4'-dicarboxylate is used as a chemical compound.\n2. NCS is used as a chemical compound.\n3. P25 TiO2 fiber is coated with P25 TiO2.\n4. Metal salts are added to PEG solution to prepare slurry.\n5. Solaronix (N3-dye) is used as a dye.\n6. Ethanol (99.5%) is used as a solvent for the dye solution.\n7. Optical fibers are coated using dip-coating method.\n8. Grinding is performed using mortar and pestle.\n9. P25 TiO2 fiber is used as a photocatalyst material."}, {"similairty_score": 0.5924503207206726, "pdf_reference": "The TiO 2 solutions to be on optical fibers were prepared by the thermal hydrolysis method.Titanium (IV) butoxide and polyethylene glycol (PEG, molecular weight of 20,000, Merck, Darmstadt, Germany) were added to a 0.1 M nitric acid solution.The volume ratio of titanium butoxide to HNO 3 was 1:6, and the weight of PEG was 50 of that of TiO 2 .The appearance of pure TiO 2 solution was milky and slightly transparent.PEG was added to prevent cracking during the drying and calcination of the film.Furthermore, it tends to increase the viscosity of the solution as well as the uniformity TiO 2", "generated_facts": "Facts:\n1. Titanium (IV) butoxide has component TiO2.\n2. TiO2 will contain TiO2 solution.\n3. Polyethylene glycol (PEG) has molecular weight 20,000.\n4. Merck is obtained from PEG.\n5. Nitric acid solution is added to TiO2 with a volume ratio of 1:6 to Titanium (IV) butoxide.\n6. TiO2 solution has concentration of nitric acid in M.\n7. Thermal hydrolysis method involves hydrolysis reaction.\n8. TiO2 solutions are prepared by thermal hydrolysis method.\n9. Film formation involves drying and calcination of TiO2 solution.\n10. Drying is part of film formation process for TiO2 solution.\n11. Calcination is part of film formation process for TiO2 solution.\n12. Optical fibers will contain TiO2 solution.\n13. Titanium (IV) butoxide is added to 0.1 M nitric acid solution."}, {"similairty_score": 0.5891863703727722, "pdf_reference": "fibers by dip-coating method.Dye-adsorbed photocatalyst was obtained by dipping the corresponding photocatalyst into the 3 mM dye solution for 24 h.Subsequently, dye-adsorbed photocatalyst was rinsed with ethanol (99.5) to obtain a monolayer of dye on the photocatalyst surface.The resulting dye-adsorbed photocatalyst was finally put in an oven at 80 C for 30 min to remove the residue solvent on the surface of the photocatalyst. An optical-fiber photoreactor (OFPR) was designed and assembled to transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig.", "generated_facts": "Facts:\n1. Fibers are obtained by dip-coating method.\n2. Dye-adsorbed photocatalyst is obtained by dipping into 3 mM dye solution for 24 h.\n3. Dye-adsorbed photocatalist is rinsed with ethanol (99.5%).\n4. Dye-adsorbed photocatalist is put in an oven at 80 C for 30 min to remove residue solvent on the surface of the photocatalyst.\n5. Optical-fiber photoreactor (OFPR) is designed and assembled to transmit light to TiO 2 film.\n6. Photocatalyst is dipped into 3 mM dye solution for 24 h to obtain dye-adsorbed photocatalyst.\n7. Dye-adsorbed photocatalist is put in an oven at 80 C for 30 min to remove residue solvent on the surface of the photocatalyst (same as relationship 4).\n8. TiO 2 film transmits light from one side of the OFPR module to optical-fiber photoreactor (OFPR)."}]}, {"question_category": "Reactor_type", "query": "From the choices provided, which reactor type is used in the experiment?", "generation": "###Reactor_type: Fluidised-bed", "RAG_source": "generated_facts", "selected_answer": {"Reactor_type": " Fluidised-bed"}, "evidences": [{"similairty_score": 0.6250825524330139, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.5985174775123596, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was", "generated_facts": "Facts: \n1. Thermocouple is placed at Reactor.\n2. Reactor was purged by CO2 gas.\n3. Reactor was purged by Distilled water.\n4. CO2 gas bubbled through Distilled water.\n5. Space velocity of CO2 gas and H2O vapor.\n6. Space velocity of CO2 gas and H2O vapor was maintained at nearly 0.72 h-1.\n7. Photo reactions were carried out at steady state by Reactor.\n8. Photo reactions lasted for 6-24 h.\n9. Catalysts were repeated two or three times on Reactor.\n10. Catalysts deactivation was found to be negligible.\n11. Outlet gases were analyzed by GC equipment.\n12. GC equipment was equipped with FID and Porapak Q column.\n13. TiO2 underwent thermal-hydrolysis.\n14. CO2 was photoreduced on Thermal-hydrolyzed TiO2."}, {"similairty_score": 0.5907428860664368, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and", "generated_facts": "Facts:\n1. The Sun is the source of energy for Earth.\n2. CO2 can be converted into hydrocarbons using a photocatalyst in an optical fiber reactor.\n3. CO2 can be converted into methanol using a photocatalyst in an optical fiber reactor.\n4. CO2 can be converted into methane using a photocatalist in an optical fiber reactor.\n5. CO2 can be converted into ethylene using a photocatalyst in an optical fiber reactor.\n6. The Sun provides energy for the photoreactor used to convert CO2 into methanol, hydrocarbons, and other products.\n7. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n8. Artificial light is not mentioned as being used in a photoreactor.\n9. The Sun's energy is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n10. Real sunlight is used in the photoreactor that converts CO2 into methanol, hydrocarbons, and other products.\n11. Fiber can be coated with a photocatalyst to enhance its conversion efficiency.\n12. The optical-fiber photoreactor consists of nearly 120 photocatalyst-coated fibers.\n\nNote: I've tried to rephrase the relationships in a more natural-sounding way while maintaining their original meaning."}, {"similairty_score": 0.5826471447944641, "pdf_reference": "equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was studied using diffusive reflectance infrared Fourier transform (DRIFT) under UV irradiation.Zero-grade air, high-purity He and ultra-purity CO 2 (99.999 v), from Air Products (USA) were used in the IR system.In order to reduce water interference, air and He were passed through a moisture trapper before entering the photoreactor. A high-temperature chamber (HVC, Harrick HVC-DRP-1) made of 316 stainless steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were", "generated_facts": "Here are the simple fact sentences constructed by combining the given relationships:\n\nFacts:\n1. The Fused Inertial Detector is equipped with a Porapak Q column.\n2. Titanium Dioxide was studied on Carbon Dioxide using DRIFT spectroscopy.\n3. Air Products supplied ultra-purity CO2 (99.999 v) for use in the experiment.\n4. Helium passed through a moisture trapper before entering the photoreactor.\n5. Zero-grade air passed through a moisture trapper before entering the photoreactor.\n6. High-purity helium was used in the experiment.\n7. The HVC-DRP-1 is made of 316 stainless steel.\n8. The photoreactor is used as a high-temperature chamber, Harrick model.\n9. The IR system is used in the photoreduction of CO2 on TiO2.\n10. Air was passed through a moisture trapper before entering the photoreactor.\n\nNote that I did not include relationship 13 as it is an implicit repetition of relationship 7 and does not provide any new information."}, {"similairty_score": 0.5698617696762085, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}]}, {"question_category": "Operation_mode", "query": "From the choices provided, which oepration mode is used in the experiment?", "generation": "###Operation_mode: Batch/Continuous", "RAG_source": "generated_facts", "selected_answer": {"Operation_mode": " Batch/Continuous"}, "evidences": [{"similairty_score": 0.5681795477867126, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which", "generated_facts": "Facts: \n1. Steel is used in photoreactors.\n2. The HVC dome has three windows.\n3. KBr windows are transparent to both IR and UV light for IR transmittance.\n4. Quartz windows are transparent in the UV region but only partly transparent in the IR region.\n5. Photoreactors are located inside compartments of FTIR instruments.\n6. Pressure transducers measure pressure.\n7. Bubble flow meters measure gas flow rates.\n8. K-type thermocouples are connected to sample cups.\n9. Electric heaters are connected to sample cups.\n10. Temperature controllers control temperature."}, {"similairty_score": 0.5528874397277832, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel", "generated_facts": "Facts:\n1. The volume of the chelating agent is unknown.\n2. The volume of solvent used in this experiment is 15 mL.\n3. The solution contains ethanol.\n4. The solution contains isopropanol.\n5. TTIP was diluted in 35 mL of solvent.\n6. TTIP was modified by acac.\n7. The liquid resulting from the chelating agent and solvent mixture has an unknown color.\n8. The temperature set to in this experiment is ambient temperature.\n9. The solution denoted as modified TTIP resulted from diluting TTIP with solvent.\n10. Hydrochloric acid was added to TEOS solution.\n11. De-ionized water was used in 10 mL of solvent.\n12. TEOS was mixed with hydrochloric acid and de-ionized water.\n13. A dropping funnel was used to add a hydrolysant to the TEOS solution.\n14. The chelating agent (acac) affects the color of the liquid.\n15. The solvent (50 v/v of ethanol and isopropanol) affects the color of the liquid.\n16. TTIP has a deep-yellow transparent liquid color when chelated with acac.\n17. The solution denoted as modified TTIP was cooled to ambient temperature for 1 hour."}, {"similairty_score": 0.5436047315597534, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption", "generated_facts": "Facts:\n1. The electric heater is connected to a sample cup.\n2. The temperature controller is directly connected to a thermocouple and a heater.\n3. The HVC has a thermocouple and a heater.\n4. The EXFO manufactured the OmniCure 1000.\n5. The OmniCure 1000 is a type of UV light source.\n6. The 100 W Hg lamp is a type of UV light source.\n7. The optical cable is connected to a quartz window.\n8. The quartz window has UV light (light from the UV light source).\n9. The TiO2 film absorbs light with wavelength below 380 nm."}, {"similairty_score": 0.5275892615318298, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The", "generated_facts": "Facts:\n1. The Lumen meter measures light intensity.\n2. The solar concentrator tracks the sun's trajectory in Japan.\n3. The solar concentrator collects natural sunlight.\n4. The reflection dish of the solar concentrator also tracks the sun's trajectory.\n5. Optical cables transmit concentrated sunlight.\n6. Windows receive focused sunlight from the optical cables.\n7. Photoreactors contain windows that receive sunlight.\n8. Heating tapes wrap around photoreactors to regulate temperature.\n9. Temperature controllers control the heating tape temperature.\n10. Thermocouples measure reactor temperatures.\n11. Days have maximum sunlight intensity.\n12. The solar concentrator is used to collect natural sunlight in Japan.\n13. Lumen meters can be used to tune and measure light intensity.\n\nNote: I've rephrased some of the relationships to make them more concise and clear, while maintaining their original meaning."}, {"similairty_score": 0.524968683719635, "pdf_reference": "Experimental", "generated_facts": "Let's construct some simple fact sentences using the provided relationships.\n\nSince we're working with a single entity, I'll use the first option: (Experimental, is, ). This implies that Experimental is a thing or concept that exists on its own.\n\nHere are a few examples of fact sentences:\n\n1. The experimental process is ongoing.\n2. Experimental methods have been used in various fields.\n3. The term \"experimental\" refers to a type of scientific inquiry.\n\nNow, let's try the second option: (Experimental, related to, ). This implies that Experimental has some connection or association with something else.\n\nHere are a few examples of fact sentences:\n\n1. Experimental results often require statistical analysis.\n2. The experimental approach is distinct from traditional methods.\n3. Experimental design plays a crucial role in scientific research.\n\nPlease let me know if you'd like to explore more relationships or provide additional context!"}]}]} \ No newline at end of file diff --git a/CLI/Result_Example/SolarNavieRAG_Example.json b/CLI/Result_Example/SolarNavieRAG_Example.json new file mode 100644 index 0000000..7551849 --- /dev/null +++ b/CLI/Result_Example/SolarNavieRAG_Example.json @@ -0,0 +1 @@ +{"paper_title": "Photocatalytic Reduction of Greenhouse Gas CO2 to Fuel", "DOI": "10.1007/s10563-009-9065-9", "generation_model": "llama3.2:3b", "similarity_model": "nomic-embed-text", "similarity_metric": "Cosine_Similarity", "rag_type": "naive", "result": [{"question_category": "catalyst", "query": "What is the chemical name of the catalyst used in the experiment?", "generation": "###catalyst: Titanium Dioxide (TiO2)", "RAG_source": "chunks", "selected_answer": {"catalyst": " Titanium Dioxide (TiO2)"}, "evidences": [{"similairty_score": 0.6278509497642517, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.6265729069709778, "pdf_reference": "Article_Title\nPhotocatalytic Reduction of Greenhouse Gas CO2 to Fuel\nAbstract"}, {"similairty_score": 0.6251746416091919, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel"}, {"similairty_score": 0.6206226944923401, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced"}, {"similairty_score": 0.6131582260131836, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed."}]}, {"question_category": "co_catalyst", "query": "What is the chemical name of the co_catalyst used in the experiment?", "generation": "###co_catalyst: Cu-Fe", "RAG_source": "chunks", "selected_answer": {"co_catalyst": " Cu-Fe"}, "evidences": [{"similairty_score": 0.6458245515823364, "pdf_reference": "Article_Title\nPhotocatalytic Reduction of Greenhouse Gas CO2 to Fuel\nAbstract"}, {"similairty_score": 0.6371890902519226, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.626099705696106, "pdf_reference": "in hydrocarbon production as well as product selectivity.Methane and ethylene were produced on Cu-Fe loaded TiO 2 -SiO 2 photocatalyst.Since dye-sensitized Cu-Fe/P25 photocatalyst can fully harvest the light energy of 400-800 nm from sunlight, its photoactivity was significantly enhanced.Finally, CO 2 photoreduction was studied by in situ IR spectroscopy and possible mechanism for the photoreaction was proposed."}, {"similairty_score": 0.6174551248550415, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was"}, {"similairty_score": 0.6073813438415527, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced"}]}, {"question_category": "Light_source", "query": "From the choices provided, which light source is used in the experiment?", "generation": "###Light_source: UV", "RAG_source": "chunks", "selected_answer": {"Light_source": " UV"}, "evidences": [{"similairty_score": 0.6839399337768555, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.6214293837547302, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.6177785396575928, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption"}, {"similairty_score": 0.6095446944236755, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.6055726408958435, "pdf_reference": "transmit light to the fiber-supported TiO 2 film from one side of the OFPR module, as shown in Fig. 3.The fibers with length of nearly 11 cm long were inserted into the OFPR.The optical fibers were supported on circular plates with a diameter of 5.0 cm.The OFPR was irradiated by artificial light or concentrated natural sunlight through the quartz window at one side.The artificial light (320-500 nm) was supplied by Exfo Acticure 4,000 with a highest intensity at 365 nm using an appropriate color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar"}]}, {"question_category": "Lamp", "query": "From the choices provided, which lamp is used in the experiment?", "generation": "###Lamp: 'Mercury-Xenon'", "RAG_source": "chunks", "selected_answer": {"Lamp": " 'Mercury-Xenon'"}, "evidences": [{"similairty_score": 0.6403747797012329, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.6216245889663696, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.601848304271698, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption"}, {"similairty_score": 0.59084552526474, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.5811028480529785, "pdf_reference": "Experimental"}]}, {"question_category": "Reaction_medium", "query": "From the choices provided, which reaction medium is used in the experiment?", "generation": "###Reaction_medium: Liquid", "RAG_source": "chunks", "selected_answer": {"Reaction_medium": " Liquid"}, "evidences": [{"similairty_score": 0.6020915508270264, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.5890271067619324, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.5843930244445801, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.5805729031562805, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel"}, {"similairty_score": 0.5772931575775146, "pdf_reference": "optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and assembled.The XRD spectra indicated the anatase phase for all photocatalysts.It is found that the methanol yield increased with UV light intensity.A maximum methanol yield of 4.12 lmole/g-cat h is obtained when 1.0 wt Ag/TiO 2 photocatalyst was used under a light intensity of 10 W/cm 2 .When mixed oxide, TiO 2 -SiO 2 , is doped with Cu and Fe metals, the resulting photocatalysts show substantial difference in hydrocarbon production as well as product selectivity.Methane and ethylene were produced"}]}, {"question_category": "Reactor_type", "query": "From the choices provided, which reactor type is used in the experiment?", "generation": "###Reactor_type: Optical Fiber", "RAG_source": "chunks", "selected_answer": {"Reactor_type": " Optical Fiber"}, "evidences": [{"similairty_score": 0.6050081253051758, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.5929175615310669, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.5833992958068848, "pdf_reference": "Sun is the Earth's ultimate and inexhaustible energy source.One of the best routes to remedy the CO 2 problem is to convert it to valuable hydrocarbons using solar energy.In this study, CO 2 was photocatalytically reduced to produce methanol, methane and ethylene in a steady-state optical-fiber reactor under artificial light and real sunlight irradiation.The photocatalyst was dip-coated on the optical fibers that enable the light to transmit and spread uniformly inside the reactor.The optical-fiber photoreactor, comprised of nearly 120 photocatalyst-coated fibers, was designed and"}, {"similairty_score": 0.5747902989387512, "pdf_reference": "with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The reactor was purged by CO 2 gas bubbling through distilled water for 1 h before the reaction.The space velocity of CO 2 gas and H 2 O vapor was maintained at nearly 0.72 h -1 .The photo reactions were carried out at steady state and lasted for 6-24 h.Some reactions were repeated two or three times, and the deactivation of catalysts was found to be negligible.The outlet gases were analyzed by a GC equipped with FID and porapak Q column. The photoreduction of CO 2 on thermal-hydrolyzed TiO 2 was"}, {"similairty_score": 0.5674387216567993, "pdf_reference": "Experimental"}]}, {"question_category": "Operation_mode", "query": "From the choices provided, which oepration mode is used in the experiment?", "generation": "###Operation_mode: 'Continuous'", "RAG_source": "chunks", "selected_answer": {"Operation_mode": " 'Continuous'"}, "evidences": [{"similairty_score": 0.6241655349731445, "pdf_reference": "Experimental"}, {"similairty_score": 0.5862252116203308, "pdf_reference": "color filter.The light intensity could be tuned and measured with a Lumen meter (Exfo).A solar concentrator (Himawari, Japan) was used to collect natural sunlight.The reflection dish of the solar concentrator is able to track the sun trajectory during a day so that the maximum sunlight intensity can be obtained.The concentrated sunlight is transmitted via an optical cable and focused on the window of the photoreactor.The reactor was wrapped with a heating tape connected to temperature controller with a thermocouple placed at the center of the reactor to maintain the reaction temperature.The"}, {"similairty_score": 0.5714736580848694, "pdf_reference": "steel was used as the photoreactor.There were three windows on the HVC dome.Two of them were KBr windows that were transparent to both IR and UV light for IR transmittance.The third one for UV irradiation was a quartz window that was transparent in the UV region but only partly transparent in the IR region.The photoreactor was located inside the compartment of the FTIR instrument.Pressure and gas flow rate were measured by a pressure transducer and a bubble flow meter, respectively.A K-type thermocouple and an electric heater were connected on the sample cup.A temperature controller, which"}, {"similairty_score": 0.5229113101959229, "pdf_reference": "and an electric heater were connected on the sample cup.A temperature controller, which was directly connected to the thermocouple and heater of the HVC, provided accurate temperature control (1 C).The HVC can be heated up to 600 C.The UV light source (EXFO, OmniCure 1000) was supplied by a 100 W Hg lamp with filter to give light with wavelength of 365 nm only.The UV light was led to the quartz window of the HVC by an optical cable.The detailed description of the system can be found in the literature 37.Clearly, TiO 2 film absorbs light of wavelength below 380 nm.The UV-visible absorption"}, {"similairty_score": 0.49779558181762695, "pdf_reference": "corresponding volume of chelating agent in 15 mL of solvent (50 v/v of ethanol and isopropanol).The second one was Tetraisopropoxytitanium (TTIP, Merck) diluted in 35 mL of solvent.The resulted deep-yellow transparent liquid (when the acac as a chelating agent was used) or a colorless transparent liquid (without chelating agent) was cooled to ambient temperature for 1 h.The solution is denoted as modified TTIP. The hydrolysant (35 hydrochloric acid and the amount of de-ionized water used in 10 mL of solvent) was added to tetraethyl orthosilicate (TEOS, Merck) solution via a dropping funnel"}]}]} \ No newline at end of file diff --git a/CLI/Result_Spec.md b/CLI/Result_Spec.md new file mode 100644 index 0000000..0b0b001 --- /dev/null +++ b/CLI/Result_Spec.md @@ -0,0 +1,78 @@ +# SolarRAG-CLI Result Specification + +## Result Specification Table + +Here is the table that describe the saved output json file: + +| Category | Definition | DataType | Example | +| -------- | ------- | ------- | ------- | +| paper_title | The title of the paper | String | Effect of silver doping on the TiO2 for photocatalytic reduction of CO2 | +| DOI | The DOI of the paper(extracted from the paper pdf file) | String | 10.1016/j.apcatb.2010.02.030 | +| generation_model | The ollama model id for llm generation | String | llama3.2:3b | +| similarity_model | The ollama model id for text embedding | String | nomic-embed-text | +| similarity_metric | The metric for calculating the similarity between embeddings | String | Cosine_Similarity | +| rag_type | the type of rag pipeline, range=['fact', 'naive'] | String | fact | +| result | The list of generation result from the llm | List | Details given velow | + +The details for `result` part of the saved output json file: + +| Category | Definition | DataType | Example | Range(if applicable) | +| -------- | ------- | ------- | ------- | ------- | +| question_category | The category of the question for the llm | String | Light_source | [catalyst, co_catalyst, Light_source, Lamp, Reaction_medium, Reactor_type, Operation_mode] | +| query | The query for the llm | String | What is the Light_source used in the experiment? | Not applicable | +| generation | The generated answer from the llm | String | ###Light_source: UV | Not applicable | +| RAG_source | The information source provided for the RAG pipeline | String | generated_facts | Not applicable | +| selected_answer | The corresponding answer from the selection of choices | String | UV | Details are given below | +| evidences | The list of evidences for the RAG pipeline | list | Details given below | Not applicable | + +The range for `selected_answer` with correspongind `question_category`: +| question_category | Range(if applicable) | +| -------- | ------- | +| catalyst | Not applicable | +| co_catalyst | Not applicable | +| Light_source | 'UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator', 'Do not Know' | +| Lamp | 'Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator', 'Do not Know' | +| Reaction_medium | 'Liquid', 'Gas', 'Do not Know' | +| Reactor_type | 'Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed', 'Do not Know' | +| Operation_mode | 'Batch', 'Continuous', 'Batch/Continuous', 'Do not Know' | + +The details for `evidence` part of the `result` from the output json: + +| Category | Definition | DataType | Example | +| -------- | ------- | ------- | ------- | +| similairty_score | The similairty score between the query and correspond text provided to the embedding model | Float | 0.6205 | +| pdf_reference | The original text that are extracted from the paper | String | in the conventional focusing... | +| generated_fact | The generated facts based on the pdf_reference | String | Facts: 1. A homemade apparatus is used... | + +> **_NOTE:_** `generated_fact` only existed when the type of RAG pipeline is `fact` RAG. + +## Example of the SolarRAG-CLI result + +*Example:* +```json + "paper_title": "Effect of silver doping on the TiO2 for photocatalytic reduction of CO2", + "DOI": "10.1016/j.apcatb.2010.02.030", + "generation_model": "llama3.2:3b", + "similarity_model": "nomic-embed-text", + "similarity_metric": "Cosine_Similarity", + "rag_type": "fact", + "result": [ + { + "question_category": "catalyst", + "query": "What is the chemical name of the catalyst used in the experiment", + "generation": "###catalyst: Titanium dioxide (TiO2)", + "RAG_source": "generated_facts", + "selected_answer": { + "catalyst": " Titanium dioxide (TiO2)" + }, + "evidences": [ + { + "similairty_score": 0.6646425724029541, + "pdf_reference": "methods were described in our previous publication 39.It is important to minimize the influence of transport phenomena during kinetic measurements.The elimination of CO 2 diffusion from the bulk of gas through the gas-liquid interface in a laboratory batch slurry reactor was accomplished by saturating the liquid with pure CO 2 before the reaction had been started 4,11.Catalyst loading of 1 g dm -3 was chosen to avoid concentration gradients in the bulk of stirred liquid with TiO 2 suspension due to the scattering effect of light caused by the high TiO 2 concentration 11,16,40,41.The", + "generated_facts": "Facts:\n1. The methods described in publication 39 were used for kinetic measurements.\n2. Kinetic measurements should minimize influence.\n3. CO2 diffusion from the bulk of gas through the gas-liquid interface was accomplished by saturating the liquid with pure CO2.\n4. Saturating the liquid with pure CO2 resulted in a high concentration of TiO2 suspension due to scattering effects of light.\n5. The scattering effect of light caused concentration gradients in the reactor.\n6. Concentration gradients in the reactor were avoided by choosing an optimal catalyst loading.\n7. An optimal catalyst loading was used to eliminate CO2 diffusion from the bulk of gas.\n8. Eliminating CO2 diffusion from the bulk of gas resulted in a laboratory batch slurry reactor being used for reaction.\n9. A laboratory batch slurry reactor was used to accomplish elimination of CO2 diffusion.\n10. The elimination of CO2 diffusion was achieved by using a TiO2 suspension that scattered light." + }, + ... + ] + } + ] +``` \ No newline at end of file diff --git a/CLI/code/FactsRAG.py b/CLI/code/FactsRAG.py new file mode 100644 index 0000000..c0cd9dd --- /dev/null +++ b/CLI/code/FactsRAG.py @@ -0,0 +1,280 @@ +from dotenv import load_dotenv +from cdlib import algorithms +import networkx as nx +import os +import json +import argparse +from collections import OrderedDict +from process import * + + +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.prompts import PromptTemplate +from langchain.retrievers import ContextualCompressionRetriever +from langchain.docstore.document import Document +from langchain_ollama import OllamaEmbeddings +from langchain_ollama import ChatOllama +from sentence_transformers import util + + +def get_text(data): + title_list = ["Article_Title", "Abstract", "Experimental", "Results and discussion"] + context = "" + context_list = [] + for section in data: + if section["title"] in title_list: + # self.context_list.append(f"{section["title"]}: {section["content"]}") + context += section["title"] + context += "\n" + context += section["content"] + context += "\n" + return context + + +def get_title(data): + for section in data: + if section["title"] == "Article_Title": + return section["content"] + else: + pass + return "None Given" + + +def get_doi(data): + for section in data: + if section["title"] == "Doi": + return section["content"] + else: + pass + return "None Given" + + +def clean_response(gen, category): + print(gen) + res = {} + s, e = gen.split(":") + res[category] = e + print(res) + return res + + +class SolarFact: + def __init__(self, llm_id, embedding_id, input_file_path=str(), context_file_path=str()): + self.llm_id = llm_id + self.embedding_id = embedding_id + self.input_file_path = input_file_path + self.context_file_path = context_file_path + self._get_llm() + self._get_documents() + self.context_result = { + "paper_title": self.paper_title, + "DOI": self.doi, + "generation_model": self.llm_id, + "similarity_model": self.embedding_id, + "similarity_metric": "Cosine_Similarity", + "result": [] + } + self.chunks, self.entities, self.relations, self.facts = self._prepare_pipeline() + + def _get_llm(self): + self.llm = ChatOllama(model=self.llm_id, temperature=0) + self.embeddings = OllamaEmbeddings(model=self.embedding_id) + + def _get_documents(self): + if self.input_file_path[-3:] == "pdf": + # print(1) + data = process_paper(self.input_file_path) + else: + with open(self.input_file_path, "rb") as f: + data = json.load(f) + self.paper_title = get_title(data) + self.doi = get_doi(data) + self.documents = get_text(data) + + def _split_documents_into_chunks(self, chunk_size=600, overlap_size=100): + documents = [Document(page_content=self.documents)] + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap_size) + chunks = text_splitter.split_documents(documents) + return chunks + + def _extract_entities_from_chunks(self, chunks): + entities = {} + title_list = ['Article Title', "Abstract", "Experimental", "Results and discussion"] + print(f"------Start extracting entities from chunks------") + print(f"------Total chunk count: {len(chunks)}------") + for index, chunk in enumerate(chunks): + if chunk in title_list: + pass + else: + response = self.llm.invoke( + [ + {"role": "system", "content": "Extract all the entities from the following text."}, + {"role": "user", "content": chunk.page_content} + ] + ) + entities_for_chunks = response.content + entities[index] = entities_for_chunks + print(f"------Entities extraction is done------") + return entities + + def _extract_relationships_from_chunks_and_entities(self, chunks, entities): + relations = {} + title_list = ['Article Title', "Abstract", "Experimental", "Results and discussion"] + print(f"------Start extracting entities from chunks------") + print(f"------Total chunk count: {len(chunks)}------") + for index, chunk in enumerate(chunks): + if chunk in title_list: + pass + else: + response = self.llm.invoke( + [ + {"role": "system", "content": "Extract all the relationship from the following context and provided entities in the format of triples, (subject, predicate, object)"}, + {"role": "user", "content": f"Context: {chunk.page_content}, Entities: {entities[index]}"} + ] + ) + relation = response.content + relations[index] = relation + print(f"------Relationships extraction is done------") + return relations + + def _generate_facts_from_relations(self, chunks, relations): + facts = {} + print(f"------Start generating factual sentences------") + for index, chunk in enumerate(chunks): + response = self.llm.invoke( + [ + {"role": "system", "content": "Construct simple fact sentences by combining the following relationships after the \"Facts:\" word."}, + {"role": "user", "content": f" Relations: {relations[index]}"} + ] + ) + fact = response.content + facts[index] = fact + print(f"------Facts generation is done------") + return facts + + def _cal_fact_cosine_similairty(self, facts, prompt, category): + if isinstance(facts, list): + facts = {index: value for index, value in enumerate(facts)} + sim_dict = {} + for key, fact in facts.items(): + fact_embed = self.embeddings.embed_query(fact) + query_embed = self.embeddings.embed_query(prompt) + cosine_similarity = util.cos_sim(fact_embed, query_embed) + sim_dict[key] = cosine_similarity[0].detach().item() + sorted_sim_dict = dict(sorted(sim_dict.items(), key=lambda item: item[1], reverse=True)) + return sorted_sim_dict + + def _generate_final_answer(self, sort_sim_dict, k, text, prompt, category, selection=None): + indexes = list(sort_sim_dict.keys())[:k] + context = "" + for ind in indexes: + context += text[ind] + if selection is None: + final_response = self.llm.invoke( + [ + {"role": "system", "content": f"Answer the following query based on the provided summary of facts. Please indicate the answer following the structure of \"###{category}:\" YOUR ANSWER"}, + {"role": "user", "content": f"Facts: {context} Query : {prompt}"} + ] + ) + + else: + final_response = self.llm.invoke( + [ + {"role": "system", "content": f"Answer the following query by selecting only one of the choices based on the provided summary of facts. Please indicate the answer following the structure of \"###{category}:\" YOUR ANSWER"}, + {"role": "user", "content": f"Query: {prompt} Choices: {selection} Facts: {context}"} + ] + ) + return indexes, final_response.content + + def _prepare_pipeline(self): + chunks = self._split_documents_into_chunks() + + entities = self._extract_entities_from_chunks(chunks) + + relations = self._extract_relationships_from_chunks_and_entities(chunks, entities) + + facts = self._generate_facts_from_relations(chunks, relations) + + return chunks, entities, relations, facts + + + def graphrag_pipeline(self, k, prompt, category): + print("---------------------running factrag pipeline---------------------") + ## Prepare data + #self.chunks, self.entities, self.relations, self.facts + + ## Get choices + if category in ["catalyst", "co_catalyst"]: + selection = None + elif category == "Light_source": + selection = "'UV', 'Solar', 'UV-Vis', 'Monochromatic', 'Solar Simulator', 'Do not Know'" + elif category == "Lamp": + selection = "'Fluorescent', 'Mercury', 'Halogen', 'Mercury-Xenon', 'LED', 'Tungsten', 'Xenon', 'Tungsten-Halide', 'Solar Simulator', 'Do not Know'" + elif category == "Reaction_medium": + selection = "'Liquid', 'Gas', 'Do not Know'" + elif category == "Reactor_type": + selection = "'Slurry', 'Fixed-bed', 'Optical Fiber', 'Monolithic', 'Membrane', 'Fluidised-bed', 'Do not Know'" + elif category == 'Operation_mode': + selection = "'Batch', 'Continuous', 'Batch/Continuous', 'Do not Know'" + + ## Run Pipeline + sim_dict = self._cal_fact_cosine_similairty(self.facts, prompt, category) + + indexes, final_response = self._generate_final_answer(sim_dict, k, self.facts, prompt, category, selection) + + evidences = [] + + for index in indexes: + evidence = { + "similairty_score": sim_dict[index], + "pdf_reference": self.chunks[index].page_content, + "generated_facts": self.facts[index] + } + evidences.append(evidence) + temp = { + "question_category": category, + "query": prompt, + "generation": final_response, + "RAG_source": "generated_facts", + "selected_answer": clean_response(final_response, category), + "evidences": evidences + } + return temp + + +def get_parser(): + parser = argparse.ArgumentParser(description="Demo of LLM Pipeline") + parser.add_argument('--llm_id', type=str, default=True, help="the parameter of which LLM model from ollama to use") + parser.add_argument('--embedding_id', type=str, default=True, help="the parameter of which embedding model from ollama to use") + parser.add_argument('--input_file_path', type=str, default=True, help="path for input data, pdf file or extracted json file") + parser.add_argument('--prompt_file', help='queries', type=str) + parser.add_argument('--context_file_path', type=str, default=True, help="save context file") + return parser + + +def main(): + parser = get_parser() + args = parser.parse_args() + args_dict = vars(args) + prompt_file = args_dict["prompt_file"] + del args_dict["prompt_file"] + factrag = SolarFact(**args_dict) + context_result = { + "paper_title": factrag.paper_title, + "DOI": factrag.doi, + "generation_model": factrag.llm_id, + "similarity_model": factrag.embedding_id, + "similarity_metric": "Cosine_Similarity", + "result": [] + } + with open(prompt_file, "rb") as f: + query_data = json.load(f) + for key, value in query_data.items(): + temp = factrag.graphrag_pipeline(5, value, key) + context_result["result"].append(temp) + # print(context_result) + with open(factrag.context_file_path, "w") as f: + json.dump(context_result, f) + + +main() \ No newline at end of file diff --git a/CLI/code/process.py b/CLI/code/process.py new file mode 100644 index 0000000..d258961 --- /dev/null +++ b/CLI/code/process.py @@ -0,0 +1,130 @@ +from grobid_client.grobid_client import GrobidClient +import xml.etree.ElementTree as ET +import json +import pandas as pd +import re +import unicodedata +import logging + +# Configurar el registro (logging) +logging.basicConfig(level=logging.ERROR, format='%(asctime)s:%(levelname)s:%(message)s') + +# GROBID client configuration +client = GrobidClient(config_path="./settings/config.json") +service_name = "processFulltextDocument" + +def process_fulltext_document(service, file): + try: + rsp = client.process_pdf(service, file, + generateIDs=True, + consolidate_header=True, + consolidate_citations=False, + include_raw_citations=False, + include_raw_affiliations=False, + tei_coordinates=True, + segment_sentences=True) + if rsp[1] == 200: # HTTP status check + return rsp[2] # rsp[2] is the XML string + else: + print(f"Error: {rsp[1]}") + except Exception as e: + print(f"Exception during PDF processing: {e}") + return None + +def clean_text(text): + """ Limpia y normaliza el texto para eliminar caracteres no deseados. """ + text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii') # Normalización Unicode + text = re.sub(r'[^\x00-\x7F]+', ' ', text) # Eliminar todos los caracteres no ASCII + text = re.sub(r'[\u00b0\n\t\r]', ' ', text) # Eliminar caracteres específicos + text = re.sub(r'[^A-Za-z0-9\s,.?!;:()\-\'\"/]', '', text) # Mantener solo caracteres alfanuméricos y puntuación básica + text = re.sub(r'\s+', ' ', text).strip() # Reemplazar múltiples espacios por uno solo + return text + +def extract_sections_from_xml(xml_content): + root = ET.fromstring(xml_content) + ns = {'tei': 'http://www.tei-c.org/ns/1.0'} + + # Extraer título + title = '' + title_element = root.find(".//tei:title", ns) + if title_element is not None: + title = ''.join(title_element.itertext()).strip() + + # Función para extraer contenido bajo una etiqueta específica + def extract_content_by_tag(tag_name): + content = [] + path = f".//{{{ns['tei']}}}{tag_name}" + for elem in root.findall(path): + text = ''.join(elem.itertext()) + if text: + content.append(clean_text(text.strip())) + return " ".join(content) + + # Función para extraer contenido según palabras clave + def extract_content_by_keywords(start_keywords, end_section): + content = [] + capture = False + for elem in root.iter(): + if elem.tag.endswith("head"): + if elem.text and any(keyword.lower() in elem.text.lower() for keyword in start_keywords): + capture = True + elif elem.text and end_section.lower() in elem.text.lower(): + break + if capture and elem.tag.endswith("p"): + text = ''.join(elem.itertext()).strip() + if text: + content.append(clean_text(text)) + return " ".join(content) + + # Nueva función para extraer el DOI + def extract_doi(): + """Extrae el DOI del documento usando su etiqueta específica.""" + doi_element = root.find(".//tei:idno[@type='DOI']", ns) + if doi_element is not None: + return clean_text(doi_element.text.strip()) + return None + + # Extraer el DOI + doi = extract_doi() + + # Construcción de las secciones + sections = [ + {"title": "Doi", "content": doi if doi else "Doi not found"}, # Incluir el DOI como primera sección + {"title": "Article_Title", "content": title}, # Agregar el título como primera sección + {"title": "Abstract", "content": extract_content_by_tag("abstract")}, + {"title": "Experimental", "content": extract_content_by_keywords( + ["Experimental", "Experimental studies", "Experiments", "Experimental methods", "Methods"], "Results and discussion")}, + {"title": "Results_and_discussion", "content": extract_content_by_keywords( + ["Results and discussion", "Result and discussion", "Results"], "Conclusion")}, + {"title": "Conclusions", "content": extract_content_by_keywords( + ["Conclusion", "Conclusions"], "Conclusion")}, + ] + + # Sección opcional "Supporting Information" + supporting_content = extract_content_by_keywords( + ["Supporting Information", "Supporting"], "Conclusion") + if supporting_content: + sections.append({"title": "Supporting_Information", "content": supporting_content}) + + return sections + + +def process_paper(pdf_file_path): + + try: + # Procesar el PDF con Grobid para obtener el XML + xml_response = process_fulltext_document(service_name, pdf_file_path) + + if xml_response: + # Extraer secciones relevantes del XML + sections = extract_sections_from_xml(xml_response) + return sections # Devolver el resultado como una lista de objetos JSON + else: + return [{"error": "Error processing the PDF."}] + except Exception as e: + logging.error(f"Error processing paper: {e}") + return [{"error": str(e)}] + + + + diff --git a/CLI/code/prompt.json b/CLI/code/prompt.json new file mode 100644 index 0000000..219922f --- /dev/null +++ b/CLI/code/prompt.json @@ -0,0 +1,9 @@ +{ + "catalyst": "What is the chemical name of the catalyst used in the experiment?", + "co_catalyst": "What is the chemical name of the co_catalyst used in the experiment?", + "Light_source": "From the choices provided, which light source is used in the experiment?", + "Lamp": "From the choices provided, which lamp is used in the experiment?", + "Reaction_medium": "From the choices provided, which reaction medium is used in the experiment?", + "Reactor_type": "From the choices provided, which reactor type is used in the experiment?", + "Operation_mode": "From the choices provided, which oepration mode is used in the experiment?" +} \ No newline at end of file diff --git a/CLI/code/run_factrag.sh b/CLI/code/run_factrag.sh new file mode 100644 index 0000000..4b838f9 --- /dev/null +++ b/CLI/code/run_factrag.sh @@ -0,0 +1,29 @@ +# !/bin/bash + +llm_id='llama3.2:3b' +embedding_id='nomic-embed-text' +prompt_file='/home/jovyan/GraphRAG/self_developed/prompt.json' + +# Define the target directory +input_directory="/home/jovyan/GraphRAG/self_developed/10_bench" +output_directory="/home/jovyan/GraphRAG/self_developed/context/" + + +if [ ! -d "$input_directory" ]; then + exit 1 +fi + +for file in "$input_directory"/*; do + if [ -f "$file" ]; then + var=$(echo "$file" | cut -d "_" -f 4) + ind=$(echo "$var" | cut -d "." -f 1) + output_file="${output_directory}result_${ind}.json" + if [ -f "$output_file" ]; then + echo "file exist" + else + python FactsRAG.py --llm_id "$llm_id" --embedding_id "$embedding_id" --input_file_path "$file" --prompt_file "$prompt_file" --context_file_path "$output_file" + echo $output_file + fi + fi +echo "DONE" +done \ No newline at end of file diff --git a/CLI/code/settings/config.json b/CLI/code/settings/config.json new file mode 100644 index 0000000..789c3fa --- /dev/null +++ b/CLI/code/settings/config.json @@ -0,0 +1,7 @@ +{ + "grobid_server": "http://localhost:8070", + "batch_size": 1000, + "sleep_time": 5, + "timeout": 60, + "coordinates": ["persName", "figure", "ref", "biblStruct", "formula", "s"] +}