Skip to content

Commit

Permalink
feat: Add replace between indices on ExtendParagraph (#39)
Browse files Browse the repository at this point in the history
  • Loading branch information
ReinderVosDeWael authored Jan 2, 2025
1 parent a36e4e7 commit c18a50f
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "cmi-docx"
version = "0.3.7"
version = "0.3.8"
description = "Additional tooling for Python-docx."
readme = "README.md"
requires-python = ">=3.10"
Expand Down
65 changes: 57 additions & 8 deletions src/cmi_docx/paragraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

@dataclasses.dataclass
class FindParagraph:
"""Data class for maintaing find results in paragraphs.
"""Data class for maintaining find results in paragraphs.
Attributes:
paragraph: The paragraph containing the text.
Expand Down Expand Up @@ -71,23 +71,24 @@ def find_in_runs(self, needle: str) -> list[run.FindRun]:
run_lengths = [len(run.text) for run in self.paragraph.runs]
cumulative_run_lengths = list(itertools.accumulate(run_lengths))

for occurence in self.find_in_paragraph(needle).character_indices:
start_run = bisect.bisect_right(cumulative_run_lengths, occurence[0])
for occurrence in self.find_in_paragraph(needle).character_indices:
start_run = bisect.bisect_right(cumulative_run_lengths, occurrence[0])
end_run = bisect.bisect_right(
cumulative_run_lengths[:-1],
occurence[1] - 1, # -1 as the range does not include the last character
occurrence[1]
- 1, # -1 as the range does not include the last character
lo=start_run,
)

start_index = (
occurence[0] - cumulative_run_lengths[start_run - 1]
occurrence[0] - cumulative_run_lengths[start_run - 1]
if start_run > 0
else occurence[0]
else occurrence[0]
)
end_index = (
occurence[1] - cumulative_run_lengths[end_run - 1]
occurrence[1] - cumulative_run_lengths[end_run - 1]
if end_run > 0
else occurence[1]
else occurrence[1]
)

run_finds.append(
Expand Down Expand Up @@ -117,6 +118,54 @@ def replace(
for run_find in run_finder:
run_find.replace(replace, style)

def replace_between(
self, start: int, end: int, replace: str, style: styles.RunStyle | None = None
) -> None:
"""Replace text between indices.
Args:
start: The first index to replace.
end: The last index to replace.
replace: The text to insert.
style: The style to apply to the replacement text. If None, matches
the style of the first run in the replacement window.
"""
cumulative_run_lengths = list(
itertools.accumulate(
(len(run.text) for run in self.paragraph.runs), initial=0
)
)
start_run_index = bisect.bisect_right(cumulative_run_lengths, start) - 1
end_run_index = bisect.bisect_right(cumulative_run_lengths, end) - 1

for index in range(start_run_index + 1, end_run_index):
self.paragraph.runs[index].text = ""

start_run = self.paragraph.runs[start_run_index]
end_run = self.paragraph.runs[end_run_index]

if end_run_index != start_run_index:
remainder = (
end
- cumulative_run_lengths[end_run_index]
+ cumulative_run_lengths[start_run_index]
)
end_run.text = end_run.text[remainder:]
after_text = None
else:
after_text = start_run.text[end - cumulative_run_lengths[end_run_index] :]

start_run.text = start_run.text[
: start - cumulative_run_lengths[start_run_index]
]
if style is None:
style = run.ExtendRun(start_run).get_format()
self.insert_run(start_run_index + 1, replace, style)
if after_text:
self.insert_run(
start_run_index + 2, after_text, run.ExtendRun(start_run).get_format()
)

def insert_run(self, index: int, text: str, style: styles.RunStyle) -> docx_run.Run:
"""Inserts a run into a paragraph.
Expand Down
31 changes: 31 additions & 0 deletions tests/test_paragraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,34 @@ def test_insert_run_empty() -> None:

assert para.text == "Hello"
assert para.runs[0].bold


def test_replace_between_one_run() -> None:
"""Test replacing text in one run."""
document = docx.Document()
para = document.add_paragraph("This is a sample paragraph.")
extend_paragraph = paragraph.ExtendParagraph(para)

extend_paragraph.replace_between(5, 7, "was")

assert para.text == "This was a sample paragraph."
assert para.runs[0].text == "This "
assert para.runs[1].text == "was"
assert para.runs[2].text == " a sample paragraph."


def test_replace_between_multiple_runs() -> None:
"""Test replacing text in multiple runs."""
document = docx.Document()
para = document.add_paragraph("This")
para.add_run(" is ")
para.add_run(" Sparta!")
extend_paragraph = paragraph.ExtendParagraph(para)

extend_paragraph.replace_between(3, 11, "nk sm")

assert para.text == "Think smarta!"
assert para.runs[0].text == "Thi"
assert para.runs[1].text == "nk sm"
assert para.runs[2].text == "" # Remnant of the ' is ' run.
assert para.runs[3].text == "arta!"

0 comments on commit c18a50f

Please sign in to comment.