From b02fc3c14fce815e71f3d7a85aac0d9a50c61010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thibault=20Cl=C3=A9rice?= Date: Mon, 26 Aug 2024 09:54:26 +0200 Subject: [PATCH] citationTrees correctly implemented --- dapitains/app/database.py | 38 ++++++++++++++++++++++++++++------- dapitains/app/ingest.py | 23 ++------------------- dapitains/metadata/classes.py | 4 ++-- tests/test_catalog.py | 8 ++++---- 4 files changed, 39 insertions(+), 34 deletions(-) diff --git a/dapitains/app/database.py b/dapitains/app/database.py index 954c8c6..06a54a5 100644 --- a/dapitains/app/database.py +++ b/dapitains/app/database.py @@ -1,3 +1,5 @@ +from collections import defaultdict + try: from flask_sqlalchemy import SQLAlchemy from sqlalchemy.ext.mutable import MutableDict, Mutable @@ -10,8 +12,6 @@ from typing import Optional, Dict, Any import dapitains.metadata.classes as abstracts -from dapitains.metadata.xml_parser import Catalog -from dapitains.tei.document import Document import json @@ -59,6 +59,7 @@ class Collection(db.Model): dublin_core = db.Column(JSONEncoded, nullable=True) extensions = db.Column(JSONEncoded, nullable=True) citeStructure = db.Column(JSONEncoded, nullable=True) + default_tree = db.Column(db.String, nullable=True) # One-to-one relationship with Navigation navigation = db.relationship('Navigation', uselist=False, backref='collection', lazy=True) @@ -92,8 +93,16 @@ def json(self, inject: Optional[Dict[str, Any]] = None): } if self.description: data["description"] = self.description + if self.resource: + data["citationTrees"] = [] if self.citeStructure: - data["citeStructure"] = self.citeStructure + data["citationTrees"] = [self.citeStructure[self.default_tree]] + if len(self.citeStructure) >= 1: + data["citationTrees"][0]["identifier"] = self.default_tree + for key in self.citeStructure: + if key != self.default_tree: + data["citationTrees"].append(self.citeStructure[key]) + self.citeStructure[key]["identifier"] = key if self.dublin_core: # ToDo: Fix the way it's presented to adapt to dts view data["dublinCore"] = self.dublin_core if self.extensions: @@ -103,16 +112,32 @@ def json(self, inject: Optional[Dict[str, Any]] = None): @classmethod def from_class(cls, obj: abstracts.Collection) -> "Collection": - return cls( + dublin_core = defaultdict(list) + for dublin in obj.dublin_core: + if dublin.language: + dublin_core[dublin.term].append({"lang": dublin.language, "value": dublin.value}) + else: + dublin_core[dublin.term].append(dublin.value) + + extensions = defaultdict(list) + for exte in obj.extensions: + if exte.language: + extensions[exte.term].append({"lang": exte.language, "value": exte.value}) + else: + extensions[exte.term].append(exte.value) + + + obj = cls( identifier=obj.identifier, title=obj.title, description=obj.description, resource=obj.resource, filepath=obj.filepath, # We are dumping because it's not read or accessible - dublin_core=[dub.json() for dub in obj.dublin_core], - extensions=[ext.json() for ext in obj.extension] + dublin_core=dublin_core, #[dub.json() for dub in obj.dublin_core], + extensions=extensions, # [ext.json() for ext in obj.extension] ) + return obj class Navigation(db.Model): @@ -120,7 +145,6 @@ class Navigation(db.Model): id = db.Column(db.Integer, primary_key=True, autoincrement=True, nullable=False) collection_id = db.Column(db.Integer, db.ForeignKey('collections.id'), nullable=False, unique=True) - # default_tree = db.Column(db.String, nullable=True) # JSON fields stored as TEXT paths = db.Column(JSONEncoded, nullable=False, default={}) diff --git a/dapitains/app/ingest.py b/dapitains/app/ingest.py index 929593f..4c73cf1 100644 --- a/dapitains/app/ingest.py +++ b/dapitains/app/ingest.py @@ -27,6 +27,8 @@ def store_single(catalog: Catalog, keys: Optional[Dict[str, int]]): key: value.structure.json() for key, value in doc.citeStructure.items() } + coll_db.default_tree = doc.default_tree + db.session.add(coll_db) db.session.commit() for parent, child in catalog.relationships: @@ -42,24 +44,3 @@ def store_catalog(*catalogs): keys = {} for catalog in catalogs: store_single(catalog, keys) - - -if __name__ == "__main__": - import flask - import os - from dapitains.metadata.xml_parser import parse - app = flask.Flask(__name__) - - basedir = os.path.abspath(os.path.dirname(__file__)) - db_path = os.path.join(basedir, 'app.db') - app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{db_path}' - app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False - - db.init_app(app) - with app.app_context(): - db.drop_all() - db.create_all() - - catalog, _ = parse("/home/thibault/dev/MyDapytains/tests/catalog/example-collection.xml") - - store_catalog(catalog) diff --git a/dapitains/metadata/classes.py b/dapitains/metadata/classes.py index 09de602..90bec08 100644 --- a/dapitains/metadata/classes.py +++ b/dapitains/metadata/classes.py @@ -33,7 +33,7 @@ class Collection: title: str description: Optional[str] = None dublin_core: List[DublinCore] = field(default_factory=list) - extension: List[Extension] = field(default_factory=list) + extensions: List[Extension] = field(default_factory=list) resource: bool = False filepath: Optional[str] = None @@ -43,7 +43,7 @@ def json(self): "title": self.title, "description": self.description, "dublin_core": self.dublin_core, - "extension": self.extension, + "extension": self.extensions, "resource": self.resource, "filepath": self.filepath } diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 145ac77..6602b2f 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -16,7 +16,7 @@ def test_ingestion(): title='A collection', description=None, dublin_core=[ DublinCore(term='abstract', value='This is a perfect example of an absract.', language=None), - DublinCore(term='abstract', value='Et je peux traduire en français', language='fr')], extension=[], + DublinCore(term='abstract', value='Et je peux traduire en français', language='fr')], extensions=[], resource=False, filepath=None ), @@ -29,7 +29,7 @@ def test_ingestion(): DublinCore(term='subject', value='History', language=None), DublinCore(term='date', value='2023-08-24', language=None) ], - extension=[], + extensions=[], resource=False, filepath=None ), @@ -41,7 +41,7 @@ def test_ingestion(): DublinCore(term='subject', value='World War II', language=None), DublinCore(term='language', value='en', language=None) ], - extension=[], resource=True, + extensions=[], resource=True, filepath=os.path.abspath(f"{local_dir}/tei/multiple_tree.xml") ), "https://foo.bar/text": Collection( @@ -51,7 +51,7 @@ def test_ingestion(): dublin_core=[ DublinCore(term='title', value='A simple resource', language=None) ], - extension=[], + extensions=[], resource=True, filepath=os.path.abspath(f"{local_dir}/tei/base_tei.xml") )