Skip to content

Commit

Permalink
Merge branch 'master' into mvp2
Browse files Browse the repository at this point in the history
  • Loading branch information
joofio committed Jan 25, 2024
2 parents 3198ab5 + 8416eec commit 4d20fc1
Show file tree
Hide file tree
Showing 195 changed files with 2,388 additions and 977 deletions.
Binary file modified ePICreator/Dovato.xlsx
Binary file not shown.
Binary file modified ePICreator/HIPÉRICO ARKOPHARMA.xlsx
Binary file not shown.
Binary file modified ePICreator/Humalog.xlsx
Binary file not shown.
Binary file modified ePICreator/Mirtazapine.xlsx
Binary file not shown.
Binary file modified ePICreator/Paracetamol.xlsx
Binary file not shown.
Binary file modified ePICreator/Pentasa.xlsx
Binary file not shown.
Binary file modified ePICreator/Remsima.xlsx
Binary file not shown.
Binary file modified ePICreator/acmeDrug.xlsx
Binary file not shown.
Binary file added ePICreator/amox.xlsx
Binary file not shown.
Binary file modified ePICreator/cinitrapide.xlsx
Binary file not shown.
78 changes: 78 additions & 0 deletions ePICreator/pdf-to-html/test-pdf-to-html-en.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import fitz # PyMuPDF
import re


# Define the path to the PDF file
pdf_path = "../source-data/epi/pil.10637.pdf"
html_file_path = "./pil.10637.md"


# Open the PDF file
def replace_unicode_character(input_string, character_to_replace, replacement_char):
return input_string.replace(character_to_replace, replacement_char)


def cleanhtml(raw_html):
raw_html = re.sub(r"\*\n", "* ", raw_html)
raw_html = re.sub(r"\d{2,3}\s\n", "", raw_html)

raw_html = re.sub(r"(\d\.)\s\n", r"\1 ", raw_html)
raw_html = re.sub(r"\so\s", "* ", raw_html)
raw_html = raw_html.replace(
"4. Possible side effects \n ", "## Possible side effects\n"
)
raw_html = raw_html.replace("\n \n5. How to store ", " \n## How to store ")
raw_html = raw_html.replace(
"\n \n6. Contents of the pack and other information",
" \n## Contents of the pack and other information",
)
raw_html = raw_html.replace("\n \n3. How to take ", " \n## How to take ")
raw_html = raw_html.replace(
"\n \n2. What you need to know before you take ",
" \n## What you need to know before you take ",
)
raw_html = raw_html.replace("\n \n1. What ", " \n## What ")
return raw_html


def parse_html(html_content):
new_html_content = []
endidx = -1
pasr = html_content.split("\n")
for idx, line in enumerate(pasr):
line = replace_unicode_character(line, chr(61607) + " ", "*")
new_html_content.append(line)
if "B. PACKAGE LEAFLET" or "Package Leaflet (PIL)" in line: # ema and UK
print(line)
startidx = idx
if (
"Detailed information on this medicine is available on the European Medicines Agency web site: "
in line
):
print(line)
endidx = idx + 5
break
print(startidx, endidx)
return "\n".join(new_html_content[startidx:endidx])


doc = fitz.open(pdf_path)

# Initialize an empty string to store HTML content
html_content = ""

# Loop through each page in the PDF and extract text as HTML
for page in doc:
html_content += page.get_text()


# Close the PDF document
doc.close()


html_content = parse_html(html_content)

html_content = cleanhtml(html_content)
# Save the extracted HTML to a file
with open(html_file_path, "w") as file:
file.write(html_content)
78 changes: 78 additions & 0 deletions ePICreator/pdf-to-html/test-pdf-to-html-es.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import fitz # PyMuPDF
import re


# Define the path to the PDF file
pdf_path = "../source-data/epi/xenical-epar-product-information_es.pdf"
html_file_path = "../source-data/epi/xenical-epar-product-information_es.md"


# Open the PDF file
def replace_unicode_character(input_string, character_to_replace, replacement_char):
return input_string.replace(character_to_replace, replacement_char)


def cleanhtml(raw_html):
raw_html = re.sub(r"\*\n", "* ", raw_html)
raw_html = re.sub(r"\d{2,3}\s\n", "", raw_html)

raw_html = re.sub(r"(\d\.)\s\n", r"\1 ", raw_html)
# raw_html = re.sub(r"\so\s", "* ", raw_html)
raw_html = raw_html.replace(
"4. Possible side effects \n ", "## Possible side effects\n"
)
raw_html = raw_html.replace("\n \n5. How to store ", " \n## How to store ")
raw_html = raw_html.replace(
"\n \n6. Contents of the pack and other information",
" \n## Contents of the pack and other information",
)
raw_html = raw_html.replace("\n \n3. How to take ", " \n## How to take ")
raw_html = raw_html.replace(
"\n \n2. What you need to know before you take ",
" \n## What you need to know before you take ",
)
raw_html = raw_html.replace("\n \n1. What ", " \n## What ")
return raw_html


def parse_html(html_content):
new_html_content = []
endidx = -1
pasr = html_content.split("\n")
for idx, line in enumerate(pasr):
line = replace_unicode_character(line, "• ", "*")
new_html_content.append(line)
if "B. PROSPECTO" in line: # ema and UK
print(line)
startidx = idx
if (
"La información detallada de este medicamento está disponible en la página web de la Agencia Europea de Medicamentos:"
in line
):
print(line)
endidx = idx + 5
break
print(startidx, endidx)
return "\n".join(new_html_content[startidx:endidx])


doc = fitz.open(pdf_path)

# Initialize an empty string to store HTML content
html_content = ""

# Loop through each page in the PDF and extract text as HTML
for page in doc:
html_content += page.get_text()


# Close the PDF document
doc.close()


html_content = parse_html(html_content)

html_content = cleanhtml(html_content)
# Save the extracted HTML to a file
with open(html_file_path, "w") as file:
file.write(html_content)
Binary file modified ePICreator/skilarence.xlsx
Binary file not shown.
Binary file modified ePICreator/tegretol.xlsx
Binary file not shown.
17 changes: 15 additions & 2 deletions ePICreator/templates/Bundle.fsh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RuleSet: {{data["dictionary"]["MajorName"] | lower | regex_replace('[^A-Za-z0-9]
{%- for refs in value %}


{% if key != "Composition" and key !="Bundle" -%}
{% if key != "Composition" and key !="Bundle" and key !="List" -%}
// {{key}}
{% if "Substance" not in key -%}

Expand All @@ -38,8 +38,21 @@ Title: "ePI document Bundle for {{data["dictionary"]["productname"]}} Package Le
Description: "ePI document Bundle for {{data["dictionary"]["productname"]}} Package Leaflet for language {{row["language"]}}"
Usage: #example

{% if row["identifier_value"]!="nan" %}
{% set ns = namespace() %}
{% set ns.one =row["language"] %}
{% set ns.two = data["dictionary"]["productname"]| regex_replace('[^A-Za-z0-9]+', '') %}
{% set ns.name_to_has= ns.one ~ ns.two %}



{% if row["identifier_value"]=="nan" %}
* identifier[+].system = "{{row['identifier_system']}}"
* identifier.value = "{{ns.name_to_has| create_hash_id}}"

{% elif row["identifier_value"]=="xx" %}
* identifier[+].system = "{{row['identifier_system']}}"
* identifier.value = "{{ns.name_to_has| create_hash_id}}"
{% else %}
* identifier.system = "{{row['identifier_system']}}"
* identifier.value = "{{row["identifier_value"]|trim}}"
{% endif %}
Expand Down
59 changes: 40 additions & 19 deletions ePICreator/templates/Composition.fsh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,29 @@
{% if row["skip"] not in ['y', 'Y', 'x', 'X'] %}

{% set ns = namespace() %}
{% if row["language"] == "es"%}
{% set ns.title = "B. PROSPECTO" %}
{% set ns.user = "PROSPECTO: INFORMACIÓN PARA EL USUARIO" %}
{% set ns.leaflet = "Contenido del prospecto" %}
{% set ns.h1 = "1. Qué es "~row['name']~" y para qué se utiliza"%}

{% set ns.h2 = "2. Qué necesita saber antes de empezar a tomar " ~row['name'] %}
{% set ns.h3 = "3. Cómo tomar "~row['name'] %}
{% set ns.h4 = "4. Posibles efectos adversos" %}
{% set ns.h5 = "5. CONSERVACIÓN DE "~row['name'] %}
{% set ns.h6 = "6. Contenido del envase e información adicional" %}

{% else %}
{% set ns.title = "B. Package Leaflet" %}
{% set ns.user = "Package leaflet: Information for the user" %}
{% set ns.leaflet = "What is in this leaflet" %}
{% set ns.h1 = "1. What "~row['name']~" is and what it is used for" %}
{% set ns.h2 = "2. What you need to know before you take "~row['name'] %}
{% set ns.h3 = "3. How to take "~row['name'] %}
{% set ns.h4 = "4. Possible side effects" %}
{% set ns.h5 = "5. How to store "~row['name'] %}
{% set ns.h6 = "6. Contents of the pack and other information" %}
{% endif %}

Instance: composition-{{row["language"]}}-{{data["dictionary"]["productname"]| regex_replace('[^A-Za-z0-9]+', '')| create_hash_id}}
InstanceOf: CompositionUvEpi
Expand All @@ -13,7 +35,6 @@ Usage: #example
* identifier.system = "{{row['identifier_system']}}"

{% if row["identifier"]|string == "nan" %}
{% set ns = namespace() %}
{% set ns.one = "Composition" %}
{% set ns.two = data["dictionary"]["productname"]| regex_replace('[^A-Za-z0-9]+', '') %}
{% set ns.name_to_has= ns.one ~ ns.two %}
Expand Down Expand Up @@ -58,9 +79,9 @@ Usage: #example


* section[+].
* title = "B. Package Leaflet"
* title = "{{ns.title}}"
* code = https://spor.ema.europa.eu/rmswi/#100000155538
* code.text = "B. PACKAGE LEAFLET"
* code.text = "{{ns.title}}"
* text.status = #additional
{% if row["package_leaflet"]|string == "nan" %}

Expand All @@ -76,9 +97,9 @@ Usage: #example


* section[=].section[+]
* title = "Package leaflet: Information for the user"
* title = "{{ns.user}}"
* code = https://spor.ema.europa.eu/rmswi/#100000155538
* code.text = "Package leaflet: Information for the user"
* code.text = "{{ns.user}}"
* text.status = #additional
{% if row["information_user"]|string == "nan" %}

Expand All @@ -94,9 +115,9 @@ Usage: #example


* section[=].section[+]
* title = "What is in this leaflet"
* title = "{{ns.leaflet}}"
* code = https://spor.ema.europa.eu/rmswi/#100000155538
* code.text = "What is in this leaflet"
* code.text = "{{ns.leaflet}}"
* text.status = #additional
{% if row["what_in_leaflet"]|string == "nan" %}

Expand All @@ -111,9 +132,9 @@ Usage: #example


* section[=].section[+]
* title = "1. What {{row['name']}} is and what it is used for"
* title = "{{ns.h1}}"
* code = https://spor.ema.europa.eu/rmswi/#100000155538
* code.text = "1. What {{row['name']}} is and what it is used for"
* code.text = "{{ns.h1}}"
* text.status = #additional
{% if row["what_product_is"]|string == "nan" %}

Expand All @@ -130,9 +151,9 @@ Usage: #example


* section[=].section[+]
* title = "2. What you need to know before you take {{row['name']}}"
* title = "{{ns.h2}}"
* code = https://spor.ema.europa.eu/rmswi/#100000155538
* code.text = "2. What you need to know before you take {{row['name']}}"
* code.text = "{{ns.h2}}"
* text.status = #additional
{% if row["before_take"]|string == "nan" %}

Expand All @@ -146,9 +167,9 @@ Usage: #example
{%- endif %}

* section[=].section[+]
* title = "3. How to take {{row['name']}}"
* title = "{{ns.h3}}"
* code = https://spor.ema.europa.eu/rmswi/#100000155538
* code.text = "3. How to take {{row['name']}}"
* code.text = "{{ns.h3}}"
* text.status = #additional
{% if row["how_to_take"]|string == "nan" %}

Expand All @@ -163,9 +184,9 @@ Usage: #example


* section[=].section[+]
* title = "4. Possible side effects"
* title = "{{ns.h4}}"
* code = https://spor.ema.europa.eu/rmswi/#100000155538
* code.text = "4. Possible side effects"
* code.text = "{{ns.h4}}"
* text.status = #additional
{% if row["side_effects"]|string == "nan" %}

Expand All @@ -179,9 +200,9 @@ Usage: #example
{%- endif %}

* section[=].section[+]
* title = "5. How to store {{row['name']}}"
* title = "{{ns.h5}}"
* code = https://spor.ema.europa.eu/rmswi/#100000155538
* code.text = "5. How to store {{row['name']}}"
* code.text = "{{ns.h5}}"
* text.status = #additional
{% if row["how_to_store"]|string == "nan" %}

Expand All @@ -196,9 +217,9 @@ Usage: #example


* section[=].section[+]
* title = "6. Contents of the pack and other information"
* title = "{{ns.h6}}"
* code = https://spor.ema.europa.eu/rmswi/#100000155538
* code.text = "6. Contents of the pack and other information"
* code.text = "{{ns.h6}}"
* text.status = #additional
{% if row["other_info"]|string == "nan" %}

Expand Down
3 changes: 2 additions & 1 deletion ePICreator/templates/List.fsh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

{% if data["data"]|length> 1%}



Expand Down Expand Up @@ -53,3 +53,4 @@ Description: "List of {{data["dictionary"]["MajorName"]}} Package Leaflets"
{%- endif %}
{%- endfor %}

{%- endif %}
3 changes: 3 additions & 0 deletions ePICreator/templates/RegulatedAuthorization.fsh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ Usage: #example
* subject = Reference({{data["references"]["MedicinalProductDefinition"][0][0]}})
{% elif row["reference"] == "PackagedProduct"%}
* subject = Reference({{data["references"]["PackagedProductDefinition"][index][0]}})
{% else %}
* subject = Reference({{data["references"]["MedicinalProductDefinition"][0][0]}})

{% endif %}

{% endif %}
Expand Down
Binary file modified ePICreator/trastuzumab.xlsx
Binary file not shown.
Binary file modified ePICreator/xenical.xlsx
Binary file not shown.
5 changes: 0 additions & 5 deletions input/fsh/examples/List.fsh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ Description: "List of enbrel smpc Package Leaflets"

* code = $example-pi-list-types#00000021213 "ePI Master List"



* subject = Reference(mp25bbfa948f4a0b224f9baa1fe481efa8)
* subject.extension[0].url = "http://ema.europa.eu/fhir/extension/medicine-name"
* subject.extension[=].valueCoding = $100000000005#Enbrel "Enbrel"
Expand Down Expand Up @@ -51,9 +49,6 @@ Description: "List of enbrel smpc Package Leaflets"






Instance: listDovato
InstanceOf: List
Usage: #example
Expand Down
Loading

0 comments on commit 4d20fc1

Please sign in to comment.