From 77e3a08f438374562053f6f1b670bfe28570f331 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sondre=20Gr=C3=B8n=C3=A5s?= <sondre.gronas@gmail.com>
Date: Sat, 24 Feb 2024 07:30:53 +0100
Subject: [PATCH] Add support for content blocks & fix breakless_lists #11

---
 pyproject.toml               |  2 +-
 src/mkdocs_callouts/utils.py | 71 +++++++++++++++++++++++-------------
 tests/test_plugin.py         | 34 +++++++++++++++++
 3 files changed, 80 insertions(+), 27 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 3d729ac..78e191d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "mkdocs-callouts"
-version = "1.12.0"
+version = "1.13.0"
 keywords = ["mkdocs", "mkdocs-plugin", "markdown", "callouts", "admonitions", "obsidian"]
 description = "A simple plugin that converts Obsidian style callouts and converts them into mkdocs supported 'admonitions' (a.k.a. callouts)."
 readme = "README.md"
diff --git a/src/mkdocs_callouts/utils.py b/src/mkdocs_callouts/utils.py
index d2609c4..fddfa4c 100644
--- a/src/mkdocs_callouts/utils.py
+++ b/src/mkdocs_callouts/utils.py
@@ -1,13 +1,15 @@
 import re
 
-CALLOUT_BLOCK_REGEX = re.compile(r'^ ?((?:> ?)+) *\[!([^\]]*)\]([\-\+]?)(.*)?')
-# (1): indents (all leading '>' symbols)
-# (2): callout type ([!'capture'] or [!'capture | attribute'] excl. brackets and leading !)
-# (3): foldable token (+ or - or <blank>)
-# (4): title
+CALLOUT_BLOCK_REGEX = re.compile(r'^(\s*)((?:> ?)+) *\[!([^\]]*)\]([\-\+]?)(.*)?')
+# (1): leading whitespace (all tabs and 4x spaces get reused)
+# (2): indents (all leading '>' symbols)
+# (3): callout type ([!'capture'] or [!'capture | attribute'] excl. brackets and leading !)
+# (4): foldable token (+ or - or <blank>)
+# (5): title
 
-CALLOUT_CONTENT_SYNTAX_REGEX = re.compile(r'^ ?((?:> ?)+) ?')
-# (1): indents (all leading '>' symbols)
+CALLOUT_CONTENT_SYNTAX_REGEX = re.compile(r'^(\s*)((?:> ?)+)')
+# (1): leading whitespace (all tabs and 4x spaces get reused)
+# (2): indents (all leading '>' symbols)
 
 
 class CalloutParser:
@@ -46,12 +48,16 @@ def _parse_block_syntax(self, block) -> str:
         Converts the callout syntax from obsidian into the mkdocs syntax
         Takes an argument block, which is a regex match.
         """
-        # Group 1: Leading > symbols (indentation, for nested callouts)
-        indent = block.group(1).count('>')
+        # Group 1: Leading whitespace (we need to reuse tabs and 4x spaces)
+        whitespace = block.group(1).replace('    ', '\t')
+        whitespace = re.sub(r'[^\t]', '', whitespace)
+
+        # Group 2: Leading > symbols (indentation, for nested callouts)
+        indent = block.group(2).count('>')
         indent = '\t' * (indent - 1)
 
-        # Group 2: Callout block type (note, warning, info, etc.) + inline block syntax
-        c_type = block.group(2).lower()
+        # Group 3: Callout block type (note, warning, info, etc.) + inline block syntax
+        c_type = block.group(3).lower()
         c_type = re.sub(r' *\| *(inline|left) *$', ' inline', c_type)
         c_type = re.sub(r' *\| *(inline end|right) *$', ' inline end', c_type)
         c_type = re.sub(r' *\|.*', '', c_type)
@@ -59,16 +65,16 @@ def _parse_block_syntax(self, block) -> str:
         if self.convert_aliases:
             c_type = self._convert_aliases(c_type)
 
-        # Group 3: Foldable callouts
+        # Group 4: Foldable callouts
         syntax = {'-': '???', '+': '???+'}
-        syntax = syntax.get(block.group(3), '!!!')
+        syntax = syntax.get(block.group(4), '!!!')
 
-        # Group 4: Title, add leading whitespace and quotation marks, if it exists
-        title = block.group(4).strip()
+        # Group 5: Title, add leading whitespace and quotation marks, if it exists
+        title = block.group(5).strip()
         title = f' "{title}"' if title else ''
 
         # Construct the new callout syntax ({indent}!!! note "Title")
-        return f'{indent}{syntax} {c_type}{title}'
+        return f'{whitespace}{indent}{syntax} {c_type}{title}'
 
     @staticmethod
     def _convert_aliases(c_type: str) -> str:
@@ -84,7 +90,7 @@ def _breakless_list_handler(self, line: str) -> str:
         This is a workaround for Obsidian's default behavior, which allows for lists to be created
         without a blank line between them.
         """
-        is_list = re.search(r'^\s*(?:[-+*])|(?:\d+\.)\s', line)
+        is_list = re.search(r'^\s*(?:[-+*]|\d+\.)\s', line)
         if is_list and self.text_in_prev_line:
             # If the previous line was a list, keep the line as is
             if self.list_in_prev_line:
@@ -103,7 +109,7 @@ def _convert_block(self, line: str) -> str:
         match = re.search(CALLOUT_BLOCK_REGEX, line)
         if match:
             # Store the current indent level and add it to the list if it doesn't exist
-            indent_level = match.group(1).count('>')
+            indent_level = match.group(2).count('>')
             if indent_level not in self.indent_levels:
                 self.indent_levels.append(indent_level)
             return self._parse_block_syntax(match)
@@ -116,17 +122,26 @@ def _convert_content(self, line: str) -> str:
         """
         match = re.search(CALLOUT_CONTENT_SYNTAX_REGEX, line)
         if match and self.indent_levels:
-            # Get the last indent level and remove any higher levels when the current line
+            # Group 1: Leading whitespace (we need to reuse tabs and 4x spaces)
+            whitespace = match.group(1).replace('    ', '\t')
+            whitespace = re.sub(r'[^\t]', '', whitespace)
+
+            # Remove any higher levels whilst the current line
             # has a lower indent level than the last line.
-            while match.group(1).count('>') < self.indent_levels[-1]:
+            while match.group(2).count('>') < self.indent_levels[-1]:
                 self.indent_levels = self.indent_levels[:-1]
-            indent = '\t' * self.indent_levels[-1]
-            line = re.sub(rf'^ ?(?:> ?){{{self.indent_levels[-1]}}} ?', indent, line)
+            indent = '\t' * (self.indent_levels[-1] + whitespace.count('\t'))
+            line = re.sub(rf'^\s*(?:> ?){{{self.indent_levels[-1]}}} ?', indent, line)
+
+            # Handle breakless lists before returning the line, if enabled
+            if self.breakless_lists:
+                line = self._breakless_list_handler(line)
         else:
+            # Reset the relevant variables
             self.indent_levels = list()
-        # Handle breakless lists before returning the line, if enabled
-        if self.breakless_lists:
-            line = self._breakless_list_handler(line)
+            # These are unused if breakless_lists is disabled
+            self.list_in_prev_line = False
+            self.text_in_prev_line = False
         return line
 
     def convert_line(self, line: str) -> str:
@@ -135,7 +150,11 @@ def convert_line(self, line: str) -> str:
         returns _convert_block if line matches that of a callout block syntax,
         if line is not a block syntax, it will return _convert_content.
         """
-        if line.startswith('```'):
+        # Toggle in_codefence if line contains a codefence
+        if re.match(r'^\s*```', line):
+            # TODO: Might be _almost_ impossible to do, but at the moment having a codefence containing
+            #       callout syntax inside a callout block will convert the callout syntax within the codefence.
+            #       (Extremely unlikely scenario, but still)
             self.in_codefence = not self.in_codefence
         if self.in_codefence:
             # Reset the indent levels if the callout is inside a codefence
diff --git a/tests/test_plugin.py b/tests/test_plugin.py
index 7725c91..263df9c 100644
--- a/tests/test_plugin.py
+++ b/tests/test_plugin.py
@@ -245,12 +245,19 @@ def test_breakless_lists():
     result = '!!! info\n\ttext\n\t\n\t1. item 1\n\t2. item 2'
     assert (parser.parse(mkdown) == result)
 
+    # Non-lists that look like lists
+    mkdown = '> [!INFO]\n> text\n> *Not a list*\n> text\n> *: Not a list'
+    result = '!!! info\n\ttext\n\t*Not a list*\n\ttext\n\t*: Not a list'
+    assert (parser.parse(mkdown) == result)
+
+
 def test_edgecase_in_nested_callouts():
     # Go from 1, 2, 3 callouts back to 1
     mkdown = '> [!INFO]\n> > [!INFO]\n> > > [!INFO]\n> > > Text\n> Text'
     result = '!!! info\n\t!!! info\n\t\t!!! info\n\t\t\tText\n\tText'
     assert (convert(mkdown) == result)
 
+
 def test_callout_in_codeblocks():
     mkdown = '```markdown\n> [!INFO]\n> Text\n```'
     assert (convert(mkdown) == mkdown)
@@ -265,6 +272,7 @@ def test_callout_in_codeblocks():
     assert (convert(mkdown) == result)
 
 
+# TODO: We could handle this edgecase, but it's probably not worth the effort
 @pytest.mark.xfail
 def test_callout_in_codeblocks_within_callout():
     # A codefence within a callout containing a callout will still be converted
@@ -272,4 +280,30 @@ def test_callout_in_codeblocks_within_callout():
     # Given how unlikely it is to occur in practice
     mkdown = '> [!INFO]\n> ```\n> [!INFO]\n> ```'
     result = '!!! info\n\t```\n> [!INFO]\n\t```'
+    assert (convert(mkdown) == result)
+
+
+def test_content_tabs():
+    # Spaces are converted to tabs, which get preserved in the output
+    mkdown = '=== "rendered"\n\n    > [!note] Custom title here\n    > Lorem ipsum dolor sit amet, consectetur adipiscing elit.'
+    result = '=== "rendered"\n\n\t!!! note "Custom title here"\n\t\tLorem ipsum dolor sit amet, consectetur adipiscing elit.'
+    assert (convert(mkdown) == result)
+
+    # Codefences don't get converted
+    mkdown = '=== "source"\n\n    ```markdown\n    > [!note] Custom title here\n    > Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n    ```'
+    result = '=== "source"\n\n    ```markdown\n    > [!note] Custom title here\n    > Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n    ```'
+    assert (convert(mkdown) == result)
+
+    # Embedded content works as well
+    mkdown = '> [!NOTE]\n>\t=== "test"'
+    result = '!!! note\n\t\t=== "test"'
+    assert (convert(mkdown) == result)
+
+    # Embedded content with nested callouts will "work", but the formatting isn't really intuitive
+    # (Can't use > for the content tab callout, has to be tabs or spaces)
+    # Though this is a very unlikely edgecase and one should probably just use the standard syntax
+    # shown in the documentation (https://squidfunk.github.io/mkdocs-material/reference/content-tabs/#embedded-content)
+    # Example:
+    mkdown = '> [!NOTE]\n\t=== "test"\n\t\t> [!NOTE]\n\t\t> Text'
+    result = '!!! note\n\t=== "test"\n\t\t!!! note\n\t\t\tText'
     assert (convert(mkdown) == result)
\ No newline at end of file