feat: Add replace between indices on ExtendParagraph (#39)

childmindresearch · Jan 2, 2025 · c18a50f · c18a50f
1 parent a36e4e7
commit c18a50f
Show file tree

Hide file tree

Showing 3 changed files with 89 additions and 9 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "cmi-docx"
-version = "0.3.7"
+version = "0.3.8"
 description = "Additional tooling for Python-docx."
 readme = "README.md"
 requires-python = ">=3.10"

diff --git a/src/cmi_docx/paragraph.py b/src/cmi_docx/paragraph.py
@@ -13,7 +13,7 @@
 
 @dataclasses.dataclass
 class FindParagraph:
-    """Data class for maintaing find results in paragraphs.
+    """Data class for maintaining find results in paragraphs.
 
     Attributes:
         paragraph: The paragraph containing the text.
@@ -71,23 +71,24 @@ def find_in_runs(self, needle: str) -> list[run.FindRun]:
         run_lengths = [len(run.text) for run in self.paragraph.runs]
         cumulative_run_lengths = list(itertools.accumulate(run_lengths))
 
-        for occurence in self.find_in_paragraph(needle).character_indices:
-            start_run = bisect.bisect_right(cumulative_run_lengths, occurence[0])
+        for occurrence in self.find_in_paragraph(needle).character_indices:
+            start_run = bisect.bisect_right(cumulative_run_lengths, occurrence[0])
             end_run = bisect.bisect_right(
                 cumulative_run_lengths[:-1],
-                occurence[1] - 1,  # -1 as the range does not include the last character
+                occurrence[1]
+                - 1,  # -1 as the range does not include the last character
                 lo=start_run,
             )
 
             start_index = (
-                occurence[0] - cumulative_run_lengths[start_run - 1]
+                occurrence[0] - cumulative_run_lengths[start_run - 1]
                 if start_run > 0
-                else occurence[0]
+                else occurrence[0]
             )
             end_index = (
-                occurence[1] - cumulative_run_lengths[end_run - 1]
+                occurrence[1] - cumulative_run_lengths[end_run - 1]
                 if end_run > 0
-                else occurence[1]
+                else occurrence[1]
             )
 
             run_finds.append(
@@ -117,6 +118,54 @@ def replace(
         for run_find in run_finder:
             run_find.replace(replace, style)
 
+    def replace_between(
+        self, start: int, end: int, replace: str, style: styles.RunStyle | None = None
+    ) -> None:
+        """Replace text between indices.
+
+        Args:
+            start: The first index to replace.
+            end: The last index to replace.
+            replace: The text to insert.
+            style: The style to apply to the replacement text. If None, matches
+                the style of the first run in the replacement window.
+        """
+        cumulative_run_lengths = list(
+            itertools.accumulate(
+                (len(run.text) for run in self.paragraph.runs), initial=0
+            )
+        )
+        start_run_index = bisect.bisect_right(cumulative_run_lengths, start) - 1
+        end_run_index = bisect.bisect_right(cumulative_run_lengths, end) - 1
+
+        for index in range(start_run_index + 1, end_run_index):
+            self.paragraph.runs[index].text = ""
+
+        start_run = self.paragraph.runs[start_run_index]
+        end_run = self.paragraph.runs[end_run_index]
+
+        if end_run_index != start_run_index:
+            remainder = (
+                end
+                - cumulative_run_lengths[end_run_index]
+                + cumulative_run_lengths[start_run_index]
+            )
+            end_run.text = end_run.text[remainder:]
+            after_text = None
+        else:
+            after_text = start_run.text[end - cumulative_run_lengths[end_run_index] :]
+
+        start_run.text = start_run.text[
+            : start - cumulative_run_lengths[start_run_index]
+        ]
+        if style is None:
+            style = run.ExtendRun(start_run).get_format()
+        self.insert_run(start_run_index + 1, replace, style)
+        if after_text:
+            self.insert_run(
+                start_run_index + 2, after_text, run.ExtendRun(start_run).get_format()
+            )
+
     def insert_run(self, index: int, text: str, style: styles.RunStyle) -> docx_run.Run:
         """Inserts a run into a paragraph.
 

diff --git a/tests/test_paragraph.py b/tests/test_paragraph.py
@@ -152,3 +152,34 @@ def test_insert_run_empty() -> None:
 
     assert para.text == "Hello"
     assert para.runs[0].bold
+
+
+def test_replace_between_one_run() -> None:
+    """Test replacing text in one run."""
+    document = docx.Document()
+    para = document.add_paragraph("This is a sample paragraph.")
+    extend_paragraph = paragraph.ExtendParagraph(para)
+
+    extend_paragraph.replace_between(5, 7, "was")
+
+    assert para.text == "This was a sample paragraph."
+    assert para.runs[0].text == "This "
+    assert para.runs[1].text == "was"
+    assert para.runs[2].text == " a sample paragraph."
+
+
+def test_replace_between_multiple_runs() -> None:
+    """Test replacing text in multiple runs."""
+    document = docx.Document()
+    para = document.add_paragraph("This")
+    para.add_run(" is ")
+    para.add_run(" Sparta!")
+    extend_paragraph = paragraph.ExtendParagraph(para)
+
+    extend_paragraph.replace_between(3, 11, "nk sm")
+
+    assert para.text == "Think smarta!"
+    assert para.runs[0].text == "Thi"
+    assert para.runs[1].text == "nk sm"
+    assert para.runs[2].text == ""  # Remnant of the ' is ' run.
+    assert para.runs[3].text == "arta!"