From bd6985a65411dae42c1d61accc432a1f18879220 Mon Sep 17 00:00:00 2001
From: Damien Accorsi <damien.accorsi@algoo.fr>
Date: Sun, 15 Dec 2024 23:39:02 +0100
Subject: [PATCH] allow substitutions in metadata in order to avoid repetition

---
 jssg/models.py | 221 +++++++++++++++++++++++++++++++------------------
 1 file changed, 139 insertions(+), 82 deletions(-)

diff --git a/jssg/models.py b/jssg/models.py
index 3aac427..32c7823 100644
--- a/jssg/models.py
+++ b/jssg/models.py
@@ -31,12 +31,14 @@
 from math import ceil
 
 
-
-class EmptyLine(Exception) :
+class EmptyLine(Exception):
     pass
-class CommentLine(Exception) :
+
+
+class CommentLine(Exception):
     pass
 
+
 class Document:
     """A document.
 
@@ -100,8 +102,8 @@ def content(self) -> str:
         # TODO - D.A. - 2024-09-09: implement in jinja2.py module
         # the possibility to import extra markdown extensions like 'fenced-code-blocks', etc
 
-
-        if "template_engine" in self.metadata.keys() and self.metadata["template_engine"] == "django" :
+        if "template_engine" in self.metadata.keys() and self.metadata[
+            "template_engine"] == "django":
             return Template(self.body).render(
                 Context(
                     {
@@ -112,7 +114,7 @@ def content(self) -> str:
                     }
                 )
             )
-        else :
+        else:
             # TODO - D.A. - 2024-09-09 - Log markdown extensions for user usage
             # for mdext in engines["jinja2"].env.markdowner.registeredExtensions:
             #    print("Extension: ", mdext)
@@ -121,10 +123,45 @@ def content(self) -> str:
                     "posts": sorted(
                         Post.load_glob(), key=lambda p: p.timestamp, reverse=True
                     ),
-                    "data":self.data
+                    "data": self.data
                 }
             )
 
+    @classmethod
+    def process_metadata_substitutions(cls, metadata: dict) -> dict:
+        """
+        when the metadata loading step is finished, then we are looking for substitutions.
+        Substitutions are parts of metadata values like ((metadata_name)). The expected behavior
+        is to replace this parts with the associated metadata if it exists.
+
+        Note: the behavior is not recursive, so you should not use theses patterns in metadata which
+        are intended to be used as a substitution. This is mostly a convenient behavior in order to
+        avoid data duplication.
+
+        For example, if pages are organised in folders associated to the language, eg "en" for pages
+        in English, then the lang metadata would be set to ((dir)).
+
+        A good usecase is to setup global metadata, like website_url, for example, then you can
+        reuse it in other metadata like if it was a variable.
+        """
+        import re
+        from copy import deepcopy
+        substitued_metadata = deepcopy(metadata)
+
+        searchable_patterns = [(f"(({key}))", key) for key in metadata.keys() if key != "path"]
+        for key, value in substitued_metadata.items():
+            if key == "path":
+                continue  # do not process specific metadata
+            print(f"processing substitution for {key} -> {value}")
+            new_value = value
+            for (pattern, pattern_key) in searchable_patterns:
+                new_value = new_value.replace(pattern, metadata[pattern_key])
+            if new_value != value:
+                substitued_metadata[key] = new_value
+                print(
+                    f"Replace metadata[{key}]: value from [{metadata[key]}] to [{substitued_metadata[key]}]")
+        return substitued_metadata
+
     @classmethod
     def load(cls, path: Path) -> "Document":
         """Load a document.
@@ -139,14 +176,14 @@ def load(cls, path: Path) -> "Document":
         content = StringIO()
 
         with settings.JFME_DEFAULT_METADATA_FILEPATH.open() as f:
-            for line in f :
-                try :
+            for line in f:
+                try:
                     # Parse a metadata key value pair
                     key, value = cls.parse_metadata_line(line)
                     metadata[key] = value
-                except EmptyLine : # ignore empty lines
+                except EmptyLine:  # ignore empty lines
                     continue
-                except CommentLine : # ignore comment lines
+                except CommentLine:  # ignore comment lines
                     continue
 
         with path.open() as f:
@@ -171,29 +208,27 @@ def load(cls, path: Path) -> "Document":
                         # Metadata end block found
                         state = 2
                     else:
-                        try :
+                        try:
                             # Parse a metadata key value pair
                             key, value = cls.parse_metadata_line(line)
                             metadata[key] = value
-                        except EmptyLine : # ignore empty lines
+                        except EmptyLine:  # ignore empty lines
                             continue
-                        except CommentLine : # ignore comment lines
+                        except CommentLine:  # ignore comment lines
                             continue
                 elif state == 2:
                     if line.rstrip().startswith("---"):
                         # data end block found
                         # FIXME print("json reading finished: {}".format(json_data))
-                        data = json.loads(json_data)
+                        # data = json.loads(json_data)
                         state = 3
                     else:
-                        if line.strip() == "":
-                            continue  # remove empty lines
-                        if line.startswith("#"):
-                            continue  # remove comment lines
-
-                        # FIXME print("json data: append {}".format(line))
-                        json_data += line
-
+                        try:
+                            json_data += cls.parse_json_data_line(line)
+                        except EmptyLine:  # ignore empty lines
+                            continue
+                        except CommentLine:  # ignore comment lines
+                            continue
                 else:
                     # Read the content
                     content.write(line)
@@ -209,9 +244,9 @@ def load(cls, path: Path) -> "Document":
                 f"Document {path.resolve()}'s meta-data block doesn't have an end"
             )
 
+        metadata = cls.process_metadata_substitutions(metadata)
         metadata["path"] = path
-        metadata["json"] = json_data
-        metadata["data"] = data
+        metadata["data"] = json.loads(json_data)
 
         obj = cls(content=content.getvalue(), **metadata)
         obj.data = data
@@ -219,7 +254,7 @@ def load(cls, path: Path) -> "Document":
 
     @classmethod
     def load_glob(
-        cls, path: Optional[List[Path]] = None, dir = "", glob: str = "*.html", all=False
+            cls, path: Optional[List[Path]] = None, dir="", glob: str = "*.html", all=False
     ) -> Iterator["Document"]:
         """Load multiple document.
 
@@ -232,27 +267,29 @@ def load_glob(
 
         if path is None:
             raise RuntimeError("No path and no self.BASE_DIR defined")
-        
+
         files = []
-        for p in path :
-            if all :
+        for p in path:
+            if all:
                 files += (p / dir).rglob(glob)
-            else :
+            else:
                 files += (p / dir).glob(glob)
         return map(cls.load, files)
-    
+
     @classmethod
-    def make_imports(cls) :
-        
+    def make_imports(cls):
+
         import_str = ""
-        for template_dir in settings.JFME_TEMPLATES_DIRS :
-            for widget_file in (template_dir / "jinja2" / "widgets").rglob("*") :
-                if widget_file.is_file() :
-                    import_str += "{% " + "import '{}' as {}".format(widget_file.relative_to(template_dir / "jinja2"), widget_file.stem) + " %}\n"
+        for template_dir in settings.JFME_TEMPLATES_DIRS:
+            for widget_file in (template_dir / "jinja2" / "widgets").rglob("*"):
+                if widget_file.is_file():
+                    import_str += "{% " + "import '{}' as {}".format(
+                        widget_file.relative_to(template_dir / "jinja2"),
+                        widget_file.stem) + " %}\n"
         return import_str
 
     @classmethod
-    def parse_metadata_line(cls, line) :
+    def parse_metadata_line(cls, line):
         if line.strip() == "":  # ignore empty lines
             raise EmptyLine()
         if line.startswith("#"):  # ignore comment lines
@@ -260,6 +297,16 @@ def parse_metadata_line(cls, line) :
         # key, value = map(str.strip, line.split("", maxsplit=1))
         return map(str.strip, re.split("[\s]", line, maxsplit=1))
 
+    @classmethod
+    def parse_json_data_line(cls, line: str):
+        # TODO - D.A. - 2024-12-15 use an efficient json5 parser
+        if line.strip() == "":  # ignore empty lines
+            raise EmptyLine()
+        if line.strip().startswith("#"):  # ignore comment lines, even if indented
+            raise CommentLine(line)
+        return line
+
+
 class Page(Document):
     """A webpage, with a title and some content."""
 
@@ -280,33 +327,36 @@ def __init__(self, content: str, **metadata) -> None:
             self.slug = slugify(self.title)
 
         self.content_page_dir = self.path
-        while (self.content_page_dir not in self.BASE_DIR) :
+        while (self.content_page_dir not in self.BASE_DIR):
             self.content_page_dir = self.content_page_dir.parent
 
         # page folder path relative to its content_page_dir
         self.rel_folder_path = str(self.path.relative_to(self.content_page_dir).parent)
-        if self.rel_folder_path == '.' :
+        if self.rel_folder_path == '.':
             self.rel_folder_path = ''
 
     @classmethod
-    def load_page_with_slug(cls, slug: str, dir : str) -> "Page":
+    def load_page_with_slug(cls, slug: str, dir: str) -> "Page":
         # for page in list(cls.load_glob(dir=dir)):
         #     print("Search for SLUG {slug} in {dir}, compare with page {pageslug}".format(slug=slug, dir=dir, pageslug=page.slug))
         #     if page.slug == slug:
         #        return page
 
-        return next(filter(lambda p: p.slug == slug, cls.load_glob(dir = dir)))
+        return next(filter(lambda p: p.slug == slug, cls.load_glob(dir=dir)))
 
     @classmethod
     def load_glob(
-        cls, path: Optional[List[Path]] = None, dir = "", glob: str = "*.html", all = False
+            cls, path: Optional[List[Path]] = None, dir="", glob: str = "*.html", all=False
     ) -> Iterator["Page"]:
         """Overridden only to make the static typing happy."""
         return super().load_glob(path, dir, glob, all)
-    
+
     @classmethod
-    def get_pages(cls) :
-        return ({"slug": p.slug} if p.rel_folder_path == '' else {"dir": p.rel_folder_path, "slug" : p.slug} for p in Page.load_glob(all = True))
+    def get_pages(cls):
+        return (
+            {"slug": p.slug} if p.rel_folder_path == '' else {"dir": p.rel_folder_path,
+                                                              "slug": p.slug}
+            for p in Page.load_glob(all=True))
 
 
 class Post(Page):
@@ -322,77 +372,84 @@ def __init__(self, content: str, **metadata) -> None:
         """
         super().__init__(content, **metadata)
         self.timestamp = datetime.datetime.fromisoformat(metadata["date"])
-        if "category" in self.metadata :
+        if "category" in self.metadata:
             self.metadata["category"] = slugify(self.metadata["category"])
-        else :
+        else:
             self.metadata["category"] = ""
 
     @classmethod
     def load_glob(
-        cls, path: Optional[List[Path]] = None, dir = "", glob: str = "*.md", all = False
+            cls, path: Optional[List[Path]] = None, dir="", glob: str = "*.md", all=False
     ) -> Iterator["Post"]:
         """Overridden only to make the static typing happy."""
         return super().load_glob(path, dir, glob, all)
-    
+
     @classmethod
-    def get_posts(cls) :
-        return ({"slug": p.slug} if p.rel_folder_path == '' else {"dir": p.rel_folder_path, "slug" : p.slug} for p in Post.load_glob(all = True))
+    def get_posts(cls):
+        return (
+            {"slug": p.slug} if p.rel_folder_path == '' else {"dir": p.rel_folder_path,
+                                                              "slug": p.slug}
+            for p in Post.load_glob(all=True))
 
-class PostList :
-    metadata = {"page_header_h1":"Posts"}
+
+class PostList:
+    metadata = {"page_header_h1": "Posts"}
     category = ""
 
-    def __init__(self, category = "", page = 1) -> None:
+    def __init__(self, category="", page=1) -> None:
         self.category = category
         self.page = page
-        
-        if category == "" :
-            nb_posts = len(list(Post.load_glob(all = True)))
-        else :
-            nb_posts = len(list(filter(lambda p: p.metadata["category"] == self.category, Post.load_glob(all=True))))
 
-        if settings.JFME_NUMBER_OF_POSTS_BY_PAGE > 0 :
+        if category == "":
+            nb_posts = len(list(Post.load_glob(all=True)))
+        else:
+            nb_posts = len(list(filter(lambda p: p.metadata["category"] == self.category,
+                                       Post.load_glob(all=True))))
+
+        if settings.JFME_NUMBER_OF_POSTS_BY_PAGE > 0:
             self.posts_by_page = settings.JFME_NUMBER_OF_POSTS_BY_PAGE
-        else :
+        else:
             self.posts_by_page = nb_posts
 
         if self.posts_by_page > 0:
-            self.nb_pages = ceil(nb_posts / self.posts_by_page) # number of posts / number of posts by page
+            self.nb_pages = ceil(
+                nb_posts / self.posts_by_page)  # number of posts / number of posts by page
 
     @classmethod
-    def load_post_list_with_category(cls, category, page) :
+    def load_post_list_with_category(cls, category, page):
         return cls(category, page)
 
     @property
-    def categories(self) :
+    def categories(self):
         cat = set()
-        for post in Post.load_glob(all = True) :
-            if post.metadata["category"] != "" :
+        for post in Post.load_glob(all=True):
+            if post.metadata["category"] != "":
                 cat.add(post.metadata["category"])
         return sorted(cat)
 
     @classmethod
-    def get_categories_and_pages(cls) :
+    def get_categories_and_pages(cls):
         t = []
-        for category in cls().categories :
-            t += [{"category": category, "page":page} for page in range(1, cls(category).nb_pages + 1)]
+        for category in cls().categories:
+            t += [{"category": category, "page": page} for page in
+                  range(1, cls(category).nb_pages + 1)]
         return t
-        
-    def get_postlists(cls) :
+
+    def get_postlists(cls):
         return cls.get_categories_and_pages() + cls.get_pages()
 
     @classmethod
-    def get_pages(cls) :
-        if len(list(Post.load_glob(all = True))) > 0 :
-            return [{"page": page} for page in range(1, cls().nb_pages+1)]
-        else : 
+    def get_pages(cls):
+        if len(list(Post.load_glob(all=True))) > 0:
+            return [{"page": page} for page in range(1, cls().nb_pages + 1)]
+        else:
             return []
 
     @property
-    def posts(self) :
+    def posts(self):
         posts = sorted(Post.load_glob(all=True), key=lambda p: p.timestamp, reverse=True)
-        if self.category == "" :
-            return posts[self.posts_by_page*(self.page-1):self.posts_by_page*(self.page)]
-        else :
-            return list(filter(lambda p: p.metadata["category"] == self.category, posts))[self.posts_by_page*(self.page-1):self.posts_by_page*(self.page)]
-    
+        if self.category == "":
+            return posts[self.posts_by_page * (self.page - 1):self.posts_by_page * (self.page)]
+        else:
+            return list(filter(lambda p: p.metadata["category"] == self.category, posts))[
+                   self.posts_by_page * (self.page - 1):self.posts_by_page * (self.page)]