Skip to content

Commit

Permalink
new epis
Browse files Browse the repository at this point in the history
  • Loading branch information
joofio committed Jan 31, 2024
1 parent b7e2b42 commit 52633d8
Show file tree
Hide file tree
Showing 70 changed files with 1,442 additions and 150 deletions.
Binary file added ePICreator/data/calchichew-d3f.xlsx
Binary file not shown.
Binary file added ePICreator/data/domperidone.xlsx
Binary file not shown.
7 changes: 6 additions & 1 deletion ePICreator/pdf-to-html/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from os.path import exists


LANGUAGE = "pt"
LANGUAGE = "uk"


if LANGUAGE == "dk":
Expand All @@ -18,13 +18,18 @@
from parser_fr import parse_html, cleanhtml, split_parts
if LANGUAGE == "en":
from parser_en import parse_html, cleanhtml, split_parts
if LANGUAGE == "uk":
from parser_uk import parse_html, cleanhtml, split_parts

# Define the path to the PDF file
pdf_path = "../../source-data/epi/karvea-epar-product-information_da.pdf"
pdf_path = "/Users/joaoalmeida/Downloads/biktarvy-epar-product-information_da.pdf"
pdf_path = "/Users/joaoalmeida/Downloads/humalog-epar-product-information_da.pdf"
pdf_path = "/Users/joaoalmeida/Downloads/dovato-epar-product-information_da.pdf"
pdf_path = "/Users/joaoalmeida/Downloads/dovato-epar-product-information_pt.pdf"
pdf_path = "/Users/joaoalmeida/Downloads/pil.12844.pdf"
pdf_path = "/Users/joaoalmeida/Downloads/pil.4177.pdf"


html_folder = "../temp_html/"
if not exists(html_folder):
Expand Down
58 changes: 58 additions & 0 deletions ePICreator/pdf-to-html/parser_uk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import re


# Open the PDF file
def replace_unicode_character(input_string, character_to_replace, replacement_char):
return input_string.replace(character_to_replace, replacement_char)


def parse_html(html_content):
new_html_content = []
endidx = -1
startidx = 0
pasr = html_content.split("\n")
for idx, line in enumerate(pasr):
line = replace_unicode_character(line, chr(61607) + " ", "*")
line = replace_unicode_character(line, "• \n", "*")

new_html_content.append(line)
if "PATIENT INFORMATION LEAFLET" in line: # ema and UK
# print(line)
startidx = idx
if (
"Detailed information on this medicine is available on the European Medicines Agency web site: "
in line
):
# print(line)
endidx = idx + 5
break
print(startidx, endidx)
return "\n".join(new_html_content[startidx:endidx])


def cleanhtml(raw_html):
raw_html = re.sub(r"\*\n", "* ", raw_html)
raw_html = re.sub(r"\d{2,3}\s\n", "", raw_html)

raw_html = re.sub(r"(\d\.)\s\n", r"\1 ", raw_html)
# raw_html = re.sub(r"\so\s", "* ", raw_html)
raw_html = re.sub(r"-\s\n", "- ", raw_html)
raw_html = re.sub(r"•\s\n", "* ", raw_html)

return raw_html


def split_parts(clean_content):
second_part = re.findall(
r"What is in this leaflet\S?\s{0,2}\n{1,2}\s{0,2}1. .+\n2. .+\n3. .+\n4. .+\n5. .+\n6. .+\n",
clean_content,
)[0]
first_part = re.split(
r"What is in this leaflet\S?\s{0,2}\n{1,2}\s{0,2}1. .+\n2. .+\n3. .+\n4. .+\n5. .+\n6. .+\n",
clean_content,
)[0]
third_part = re.split(
r"What is in this leaflet\S?\s{0,2}\n{1,2}\s{0,2}1. .+\n2. .+\n3. .+\n4. .+\n5. .+\n6. .+\n",
clean_content,
)[1]
return first_part, second_part, third_part
4 changes: 1 addition & 3 deletions input/fsh/examples/rawEPI/acmedrug-ema-automatic/Bundle.fsh
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,7 @@ RuleSet: 5aa8042f9b2bea12d814ccaf27f6d64eBundleRuleset
* entry[=].resource = mah-2aff7154a8d952c895e8a03849a25108


// composition-en-534e209eb0a3ff60437639cb2015316f
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/composition-en-534e209eb0a3ff60437639cb2015316f/composition-en-534e209eb0a3ff60437639cb2015316f"
* entry[=].resource = composition-en-534e209eb0a3ff60437639cb2015316f



// PackagedProductDefinition
Expand Down
4 changes: 1 addition & 3 deletions input/fsh/examples/rawEPI/amox-ema-automatic/Bundle.fsh
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ RuleSet: 2d6b74860092f2b27363e070e7cc20eaBundleRuleset
* entry[=].resource = mah-0880223fada9f23e0f3760dee80fc7a3


// composition-en-b62cc095c7be2116a8a65157286376a3
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/composition-en-b62cc095c7be2116a8a65157286376a3/composition-en-b62cc095c7be2116a8a65157286376a3"
* entry[=].resource = composition-en-b62cc095c7be2116a8a65157286376a3



// PackagedProductDefinition
Expand Down
4 changes: 1 addition & 3 deletions input/fsh/examples/rawEPI/aquilea-ema-automatic/Bundle.fsh
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
RuleSet: 49bbaf88ca82da246f516eabbf90d54eBundleRuleset


// composition-es-3b8585822d27945de47423af5de9824d
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/composition-es-3b8585822d27945de47423af5de9824d/composition-es-3b8585822d27945de47423af5de9824d"
* entry[=].resource = composition-es-3b8585822d27945de47423af5de9824d




Expand Down
16 changes: 4 additions & 12 deletions input/fsh/examples/rawEPI/biktarvy-ema-automatic/Bundle.fsh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ RuleSet: 63b15a3bb9d18a00ecd0962bc011c765BundleRuleset
* entry[=].resource = cud-d79af6e660f456c6c7091803d6bd1a92


// composition-es-2a9c4333cfd126f2e7b73ad3cafd6949
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/composition-es-2a9c4333cfd126f2e7b73ad3cafd6949/composition-es-2a9c4333cfd126f2e7b73ad3cafd6949"
* entry[=].resource = composition-es-2a9c4333cfd126f2e7b73ad3cafd6949




Expand Down Expand Up @@ -76,14 +74,10 @@ RuleSet: 63b15a3bb9d18a00ecd0962bc011c765BundleRuleset
* entry[=].resource = mra-1133a7374c78f1d40f41885344f06a5a


// composition-en-2a9c4333cfd126f2e7b73ad3cafd6949
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/composition-en-2a9c4333cfd126f2e7b73ad3cafd6949/composition-en-2a9c4333cfd126f2e7b73ad3cafd6949"
* entry[=].resource = composition-en-2a9c4333cfd126f2e7b73ad3cafd6949



// composition-dk-2a9c4333cfd126f2e7b73ad3cafd6949
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/composition-dk-2a9c4333cfd126f2e7b73ad3cafd6949/composition-dk-2a9c4333cfd126f2e7b73ad3cafd6949"
* entry[=].resource = composition-dk-2a9c4333cfd126f2e7b73ad3cafd6949



// PackagedProductDefinition
Expand All @@ -96,9 +90,7 @@ RuleSet: 63b15a3bb9d18a00ecd0962bc011c765BundleRuleset
* entry[=].resource = ppd-6d93344139a7793e7b9bd0f5e32b6c91


// composition-pt-2a9c4333cfd126f2e7b73ad3cafd6949
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/composition-pt-2a9c4333cfd126f2e7b73ad3cafd6949/composition-pt-2a9c4333cfd126f2e7b73ad3cafd6949"
* entry[=].resource = composition-pt-2a9c4333cfd126f2e7b73ad3cafd6949



// MedicinalProductDefinition
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

Instance: ap-a64c0ccb066e7fdf6ba2d86dcd8bda5e
InstanceOf: AdministrableProductDefinitionUvEpi
Title: "Administrable product Calcichew-D3 Forte"
Description: "Administrable product Calcichew-D3 Forte"
Usage: #inline

* identifier.system = "https://spor.ema.europa.eu/pmswi"
* identifier.value = "gh-1"

* status = #active

* formOf = Reference(mp04c5df8b1b688f62331c672bbd9ffa97)
* administrableDoseForm = $spor-rms#100000073684 "Chewable tablet"
* unitOfPresentation = $spor-rms#200000002152 "Tablet"



* producedFrom = Reference(mid-a64c0ccb066e7fdf6ba2d86dcd8bda5e)


* routeOfAdministration.code = $spor-rms#100000073619 "Oral use"

* routeOfAdministration.targetSpecies.code = $spor-rms#100000109093 "Human"
79 changes: 79 additions & 0 deletions input/fsh/examples/rawEPI/calchichew-d3f-ema-automatic/Bundle.fsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@



RuleSet: 9e23fbe86c1f883dff598a2d9fed332eBundleRuleset





// Ingredient
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/Ingredient/ingredient-for-calchichewd3f-calciumcarbonate"
* entry[=].resource = ingredient-for-calchichewd3f-calciumcarbonate


// Ingredient
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/Ingredient/ingredient-for-calchichewd3f-cholecalciferol"
* entry[=].resource = ingredient-for-calchichewd3f-cholecalciferol


// AdministrableProductDefinition
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/AdministrableProductDefinition/ap-a64c0ccb066e7fdf6ba2d86dcd8bda5e"
* entry[=].resource = ap-a64c0ccb066e7fdf6ba2d86dcd8bda5e


// RegulatedAuthorization
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/RegulatedAuthorization/authorizationa64c0ccb066e7fdf6ba2d86dcd8bda5e0"
* entry[=].resource = authorizationa64c0ccb066e7fdf6ba2d86dcd8bda5e0


// Organization
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/Organization/mah-d21dfe8c15312e8b8bf0b7774b510a9d"
* entry[=].resource = mah-d21dfe8c15312e8b8bf0b7774b510a9d


// PackagedProductDefinition
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/PackagedProductDefinition/ppd-a46ff5cb1a9568d59a1669fd597f58bf"
* entry[=].resource = ppd-a46ff5cb1a9568d59a1669fd597f58bf


// MedicinalProductDefinition
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/MedicinalProductDefinition/mp04c5df8b1b688f62331c672bbd9ffa97"
* entry[=].resource = mp04c5df8b1b688f62331c672bbd9ffa97





// ManufacturedItemDefinition
* entry[+].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/ManufacturedItemDefinition/mid-a64c0ccb066e7fdf6ba2d86dcd8bda5e"
* entry[=].resource = mid-a64c0ccb066e7fdf6ba2d86dcd8bda5e





Instance: bundlepackageleaflet-en-a64c0ccb066e7fdf6ba2d86dcd8bda5e
InstanceOf: BundleUvEpi
Title: "ePI document Bundle for Calcichew-D3 Forte Package Leaflet for language en"
Description: "ePI document Bundle for Calcichew-D3 Forte Package Leaflet for language en"
Usage: #example




* identifier.system = "https://www.gravitatehealth.eu/sid/doc"
* identifier.value = "gh-1"
* type = #document
* timestamp = "2023-06-27T10:09:22Z"
* language = #en





// Composition
* entry[0].fullUrl = "http://hl7.eu/fhir/ig/gravitate-health/Composition/composition-en-04c5df8b1b688f62331c672bbd9ffa97"
* entry[0].resource = composition-en-04c5df8b1b688f62331c672bbd9ffa97

* insert 9e23fbe86c1f883dff598a2d9fed332eBundleRuleset
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@


Instance: ingredient-for-calchichewd3f-calciumcarbonate

InstanceOf: IngredientUvEpi
Title: "Ingredient-active Calcium Carbonate"
Description: "Calcium Carbonate"
Usage: #inline

* identifier.system = $ginas
* identifier.value = "H0G9379FGK"
* identifier.use = #official

* role = $spor-rms#100000072072 "Active"

* status = #active

* substance.code.concept.coding = $ginas#H0G9379FGK "Calcium Carbonate"


* substance.strength.presentationQuantity = 2500 'mg'

* substance.strength.basis = http://terminology.hl7.org/CodeSystem/v3-RoleClass#ACTIB "active ingredient - basis of strength"


// Reference to products
* for = Reference(mid-a64c0ccb066e7fdf6ba2d86dcd8bda5e)
* for[+] = Reference(mp04c5df8b1b688f62331c672bbd9ffa97)
* for[+] = Reference(ap-a64c0ccb066e7fdf6ba2d86dcd8bda5e)



//* manufacturer.manufacturer = Reference(mah-d21dfe8c15312e8b8bf0b7774b510a9d)


Instance: ingredient-for-calchichewd3f-cholecalciferol

InstanceOf: IngredientUvEpi
Title: "Ingredient-active cholecalciferol"
Description: "cholecalciferol"
Usage: #inline

* identifier.system = $ginas
* identifier.value = "1C6V77QF41"
* identifier.use = #official

* role = $spor-rms#100000072072 "Active"

* status = #active

* substance.code.concept.coding = $ginas#1C6V77QF41 "cholecalciferol"


* substance.strength.presentationQuantity = 22 'ug'

* substance.strength.basis = http://terminology.hl7.org/CodeSystem/v3-RoleClass#ACTIB "active ingredient - basis of strength"


// Reference to products
* for = Reference(mid-a64c0ccb066e7fdf6ba2d86dcd8bda5e)
* for[+] = Reference(mp04c5df8b1b688f62331c672bbd9ffa97)
* for[+] = Reference(ap-a64c0ccb066e7fdf6ba2d86dcd8bda5e)



//* manufacturer.manufacturer = Reference(mah-d21dfe8c15312e8b8bf0b7774b510a9d)
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

Instance: mid-a64c0ccb066e7fdf6ba2d86dcd8bda5e
InstanceOf: ManufacturedItemDefinitionUvEpi
Title: "Manufactured item Calcichew-D3 Forte"
Description: "Manufactured item Calcichew-D3 Forte"
Usage: #inline

* identifier.system = "https://spor.ema.europa.eu/pmswi"
* identifier.value = "gh-1"
* identifier.use = #official

* status = #active
* manufacturedDoseForm = https://spor.ema.europa.eu/rmswi/#100000073684 "Chewable tablet"

* unitOfPresentation = $spor-rms#200000002152 "Tablet"





//* manufacturer = Reference(mah-d21dfe8c15312e8b8bf0b7774b510a9d)
Loading

0 comments on commit 52633d8

Please sign in to comment.