Skip to content

Commit

Permalink
v0.4.4 (#13)
Browse files Browse the repository at this point in the history
- **Work on supporting images**
- **Fix several issues regarding SITE_URL, WikiLinks, and images**
  • Loading branch information
jonaprieto authored Nov 26, 2024
1 parent 8893a35 commit 46bc7d2
Show file tree
Hide file tree
Showing 16 changed files with 190 additions and 88 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
run: juvix-mkdocs build -p my-juvix-project
env:
NO_INTERACTION: true
SITE_URL: https://anoma.github.io/juvix-mkdocs
SITE_URL: https://anoma.github.io/juvix-mkdocs/
SITE_NAME: Juvix MkDocs
- if: success()
uses: JamesIves/[email protected]
Expand Down
47 changes: 24 additions & 23 deletions mkdocs_juvix/common/preprocesors/links.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any, List, Optional, Tuple
from urllib.parse import urljoin

from colorama import Fore, Style
import numpy as np # type: ignore
from fuzzywuzzy import fuzz # type: ignore
from markdown.preprocessors import Preprocessor # type: ignore
Expand Down Expand Up @@ -162,7 +163,6 @@ def process_wikilink(config, full_text, match, md_filepath) -> Optional[WikiLink

return link


class WLPreprocessor(Preprocessor):
absolute_path: Optional[Path] = None
relative_path: Optional[Path] = None
Expand All @@ -188,11 +188,11 @@ def _run(self, content: str) -> str:
and self.url is None
):
raise ValueError("No absolute path, relative path, or URL provided")

# Find all code blocks, HTML comments, and script tags in a single pass
ignore_blocks = re.compile(
r"(```(?:[\s\S]*?)```|<!--[\s\S]*?-->|<script>[\s\S]*?</script>)", re.DOTALL
r"((`{1,3})(?:[\s\S]*?)(\2)|<!--[\s\S]*?-->|<script>[\s\S]*?</script>)", re.DOTALL
)

intervals = []
try:
for match in ignore_blocks.finditer(content):
Expand All @@ -203,35 +203,36 @@ def _run(self, content: str) -> str:
except Exception as e:
log.error(f"Error occurred while processing ignore patterns: {str(e)}")
return content

# intervals_where_not_to_look = None
# if intervals:
# starts, ends, ids = map(np.array, zip(*intervals))
# intervals_where_not_to_look = NCLS(starts, ends, ids)
intervals_where_not_to_look = None
if intervals:
starts, ends, ids = map(np.array, zip(*intervals))
intervals_where_not_to_look = NCLS(starts, ends, ids)

# Find all wikilinks
str_wikilinks = list(WIKILINK_PATTERN.finditer(content))
log.debug(f"Found {len(str_wikilinks)} wikilinks")
log.debug(f"{Fore.CYAN}Found {len(str_wikilinks)} wikilinks{Style.RESET_ALL}")
replacements = []
for m in str_wikilinks:
start, end = m.start(), m.end()

# TODO: review this
# if intervals_where_not_to_look and not list(
# intervals_where_not_to_look.find_overlap(start, end)
# ):
link: Optional[WikiLink] = process_wikilink(
self.config, content, m, self.absolute_path
)
log.debug(f"Processing wikilink: {link}")
if link is not None:
replacements.append(
(
start,
end,
link.markdown(),
)
if intervals_where_not_to_look and not list(
intervals_where_not_to_look.find_overlap(start, end)
):
log.debug(f"{Fore.YELLOW}Processing wikilink: {m.group(0)}{Style.RESET_ALL}")
link: Optional[WikiLink] = process_wikilink(
self.config, content, m, self.absolute_path
)
replacement = (start, end, link.markdown()) if link is not None else None
if replacement is not None:
replacements.append(replacement)
log.debug(f"{Fore.YELLOW}Processed replacement: {replacement}{Style.RESET_ALL}")
else:
log.debug(
f"{Fore.YELLOW}Link was not processed: {m.group(0)}{Style.RESET_ALL}"
)
else:
log.debug(f"{Fore.YELLOW}Skipping wikilink: {m.group(0)}{Style.RESET_ALL}")
for start, end, new_text in reversed(replacements):
content = content[:start] + new_text + content[end:]
return content
Expand Down
4 changes: 3 additions & 1 deletion mkdocs_juvix/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ def __init__(self, config: Optional[MkDocsConfig] = None):

self.DIFF_ENABLED: bool = bool(getenv("DIFF_ENABLED", False))

if not self.SITE_URL.endswith("/"):
self.SITE_URL = self.SITE_URL + "/"
self.DIFF_BIN: str = getenv("DIFF_BIN", "diff")
self.DIFF_AVAILABLE = shutil.which(self.DIFF_BIN) is not None

Expand Down Expand Up @@ -187,7 +189,7 @@ def __init__(self, config: Optional[MkDocsConfig] = None):
and not config.get("env_init", False)
):
try:
log.info(
log.debug(
f"{Fore.YELLOW}Removing directory {self.CACHE_ABSPATH}{Style.RESET_ALL}"
)
shutil.rmtree(self.CACHE_ABSPATH, ignore_errors=True)
Expand Down
47 changes: 39 additions & 8 deletions mkdocs_juvix/images.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,38 +46,49 @@ def time_spent(message: Optional[Any] = None, print_result: bool = False):
def process_images(
env: ENV, md: Optional[str], md_filepath: Optional[Path] = None
) -> Optional[str]:
log.debug(f"{Fore.CYAN}Starting process_images function{Style.RESET_ALL}")

def create_ignore_tree(text: str) -> Optional[Any]:
"""Create NCLS tree of regions to ignore (code blocks, comments, divs)"""
log.debug(f"{Fore.CYAN}Creating ignore tree for text regions{Style.RESET_ALL}")
ignore_pattern = re.compile(
r"(```(?:[\s\S]*?)```|<!--[\s\S]*?-->|<div>[\s\S]*?</div>)", re.DOTALL
r"((`{1,3})(?:[\s\S]*?)\2|<!--[\s\S]*?-->|<div>[\s\S]*?</div>)", re.DOTALL
)
intervals = [(m.start(), m.end(), 1) for m in ignore_pattern.finditer(text)]

if intervals:
starts, ends, ids = map(np.array, zip(*intervals))
log.debug(f"{Fore.CYAN}Ignore tree created with {len(intervals)} intervals{Style.RESET_ALL}")
return NCLS(starts, ends, ids)
log.debug(f"{Fore.CYAN}No intervals found for ignore tree{Style.RESET_ALL}")
return None

def should_process_match(tree: Optional[Any], start: int, end: int) -> bool:
"""Check if match should be processed based on ignore regions"""
return not tree or not list(tree.find_overlap(start, end))
result = not tree or not list(tree.find_overlap(start, end))
log.debug(f"{Fore.CYAN}Match processing check: {result} for range ({start}, {end}){Style.RESET_ALL}")
return result

def process_image_url(new_url, match: re.Match, html: bool = False) -> str:
url_str = match.group("url")
if not url_str:
log.debug(f"{Fore.CYAN}No URL found in match{Style.RESET_ALL}")
return ""

if html:
img_rest = match.group("rest") or "<img"
log.debug(f"{Fore.CYAN}Processing HTML image URL: {new_url}{Style.RESET_ALL}")
return f'{img_rest} src="{new_url}"'

caption = match.group("caption") or ""
log.debug(f"{Fore.CYAN}Processing Markdown image URL: {new_url}{Style.RESET_ALL}")
return f"![{caption}]({new_url})"

def find_replacements(
text: str, ignore_tree: Optional[Any], html: bool = False
) -> List[Tuple[int, int, str]]:
"""Find all image references that need to be replaced"""
log.debug(f"{Fore.CYAN}Finding replacements for image references{Style.RESET_ALL}")
replacements = []

if html:
Expand All @@ -100,39 +111,59 @@ def find_replacements(
and not url.is_absolute()
and url.parent == Path(".")
):
log.debug(f"{Fore.YELLOW}Processing image URL: {url}{Style.RESET_ALL}")
log.debug(f"{Fore.YELLOW}env.SITE_URL: {env.SITE_URL}{Style.RESET_ALL}")
log.debug(f"{Fore.YELLOW}env.IMAGES_PATH: {env.IMAGES_PATH}{Style.RESET_ALL}")
log.debug(f"{Fore.YELLOW}url.name: {url.name}{Style.RESET_ALL}")
log.debug(f"{Fore.YELLOW}env.DOCS_ABSPATH: {env.DOCS_ABSPATH}{Style.RESET_ALL}")
_image_url = env.IMAGES_PATH / url.name
if _image_url.exists() and _image_url.is_relative_to(env.DOCS_ABSPATH):
_image_url = _image_url.relative_to(env.DOCS_ABSPATH)
log.debug(f"{Fore.YELLOW}_image_url: {_image_url}{Style.RESET_ALL}")

image_url = urljoin(
env.SITE_URL,
(env.IMAGES_PATH / url.name)
.relative_to(env.DOCS_ABSPATH)
.as_posix(),
env.SITE_URL or "/" ,
_image_url.as_posix()
)
log.debug(f"{Fore.YELLOW}image_url: {image_url}{Style.RESET_ALL}")

new_text = process_image_url(
image_url,
match,
html=html,
)
replacements.append((start, end, new_text))
log.debug(f"{Fore.CYAN}Found {len(replacements)} replacements{Style.RESET_ALL}")
return replacements

if md is None:
if md_filepath is None:
log.debug(f"{Fore.CYAN}No markdown content or filepath provided{Style.RESET_ALL}")
return None
log.debug(f"{Fore.CYAN}Reading markdown content from file: {md_filepath}{Style.RESET_ALL}")
markdown_text = Path(md_filepath).read_text()
else:
log.debug(f"{Fore.CYAN}Using provided markdown content{Style.RESET_ALL}")
markdown_text = md

ignore_tree = create_ignore_tree(markdown_text)
log.debug(f"{Fore.CYAN}Created ignore tree{Style.RESET_ALL}")
log.debug(f"{Fore.CYAN}Ignore tree: {ignore_tree}{Style.RESET_ALL}")
replacements = find_replacements(markdown_text, ignore_tree, html=False)
for start, end, new_text in replacements:
log.debug(f"{Fore.CYAN}Replacement: {new_text}{Style.RESET_ALL}")
log.debug(f"{Fore.CYAN}Found {len(replacements)} replacements{Style.RESET_ALL}")
for start, end, new_url in reversed(replacements):
markdown_text = markdown_text[:start] + new_url + markdown_text[end:]

if "<img" in markdown_text:
log.debug(f"{Fore.CYAN}Processing HTML image tags in markdown content{Style.RESET_ALL}")
ignore_tree = create_ignore_tree(markdown_text)
replacements = find_replacements(markdown_text, ignore_tree, html=True)
for start, end, new_url in reversed(replacements):
markdown_text = markdown_text[:start] + new_url + markdown_text[end:]

log.debug(f"{Fore.CYAN}Finished processing images{Style.RESET_ALL}")
return markdown_text


Expand Down Expand Up @@ -209,7 +240,7 @@ async def process_dot_file(dot_file: Path):
if cond:
await self._generate_dot_svg(dot_file)
if svg_file.exists():
log.info(
log.debug(
f"Requested SVG for {Fore.GREEN}{dot_file.relative_to(self.env.DOCS_PATH)}{Style.RESET_ALL} "
f"agenerated: {Fore.GREEN}{svg_file.relative_to(self.env.DOCS_PATH)}{Style.RESET_ALL}"
)
Expand All @@ -230,7 +261,7 @@ async def run_in_parallel(dot_files: List[Path]):
time_start = time.time()
trio.run(run_in_parallel, dot_files)
time_end = time.time()
log.info(
log.debug(
f"SVG generation took {Fore.GREEN}{time_end - time_start:.5f}{Style.RESET_ALL} seconds"
)
self.env.FIRST_RUN = False
Expand Down
3 changes: 1 addition & 2 deletions mkdocs_juvix/links.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def extendMarkdown(self, md): # noqa: N802

TOKEN_LIST_WIKILINKS: str = "<!-- list_wikilinks -->"


class WikilinksPlugin:
env: Optional[ENV] = None

Expand Down Expand Up @@ -223,7 +222,7 @@ def on_page_content(

if page.meta.get("list_wikilinks", False):
# Creat a bullet list of links
wrapped_links = "<details class='quote'><summary>Wiki links on this page</summary><ul>"
wrapped_links = "<details class='quote'><summary>Relevant internal links on this page</summary><ul>"
unique_links = {
link["url"]: (link["path"], link["name"]) for link in links_number
}
Expand Down
65 changes: 28 additions & 37 deletions mkdocs_juvix/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, TypeVar
from urllib.parse import urljoin

from rich.console import Console # type: ignore
from rich.markdown import Markdown # type: ignore
import pathspec
import questionary
import yaml # type:ignore
Expand Down Expand Up @@ -42,6 +43,8 @@
warnings.filterwarnings("ignore", category=DeprecationWarning)

load_dotenv()
console = Console()


# os.environ["DEBUG"] = "true"

Expand Down Expand Up @@ -328,7 +331,7 @@ def run_pipeline(self, save_markdown: bool = True, force: bool = False) -> None:
self.generate_isabelle_theories(
save_markdown=save_markdown, force=force
)
self.generate_images(save_markdown=save_markdown, force=force)
# self.generate_images(save_markdown=save_markdown, force=force)
self.replaces_wikilinks_by_markdown_links(
save_markdown=save_markdown, force=force
)
Expand Down Expand Up @@ -694,7 +697,7 @@ def _process_juvix_html(self, update_assets: bool = False) -> None:
if update_assets:
self._update_assets()
else:
log.info("HTML generation completed but not saved to disk.")
log.debug("HTML generation completed but not saved to disk.")
except subprocess.CalledProcessError as e:
self.save_error_message(e.stderr, "html")
except Exception as e:
Expand Down Expand Up @@ -1119,21 +1122,20 @@ def generate_images(
Modify the markdown output by adding the images. This requires the
preprocess of Juvix and Isabelle to be ocurred before.
"""
# if result := self.skip_and_use_cache_for_process(
# force=force,
# processed_tag="images",
# ):
# log.debug(
# f"> Skipping images generation for {Fore.GREEN}{self.relative_filepath}{Style.RESET_ALL} using cached output"
# )
# return result
if self._processed_images and not force and not self.changed_since_last_run():
log.debug(
f"> Skipping images generation for {Fore.GREEN}{self.relative_filepath}{Style.RESET_ALL} using cached output"
)
return None

log.info(f"{Fore.MAGENTA}Generating images for {self.relative_filepath}{Style.RESET_ALL}")
_output = None
_markdown_output = self.cache_filepath.read_text()
metadata = parse_front_matter(_markdown_output) or {}
preprocess = metadata.get("preprocess", {})
needs_images = preprocess.get("images", True)
if needs_images and (not self._processed_images or force):
log.debug(f"Needs images: {needs_images}")
if needs_images:
_output = process_images(
self.env,
_markdown_output,
Expand Down Expand Up @@ -1392,22 +1394,23 @@ async def process_original_markdowns():
pbar.update(1)
clear_line()

# clear_line()
# if generate_images:
# with tqdm(total=len(files_to_process), desc="> processing images") as pbar:
# for file in files_to_process:
# file.generate_images()
# current_file = file.relative_filepath
# pbar.set_postfix_str(
# f"{Fore.MAGENTA}{current_file}{Style.RESET_ALL}"
# )
# pbar.update(1)
clear_line()
if generate_images:
with sync_tqdm(
total=len(files_to_process), desc="> processing images"
) as pbar:
for file in files_to_process:
file.generate_images()
current_file = file.relative_filepath
pbar.set_postfix_str(
f"{Fore.MAGENTA}{current_file}{Style.RESET_ALL}"
)
pbar.update(1)

if generate_wikilinks:

@time_spent(message="> processing wikilinks")
async def process_wikilinks():
# if mkdocs
flist = (
files_to_process
if not self.force_wikilinks_generation
Expand Down Expand Up @@ -1556,9 +1559,6 @@ class JuvixPlugin(BasePlugin):
enhanced_collection: EnhancedMarkdownCollection
wikilinks_plugin: WikilinksPlugin
first_run: bool = True
response: Optional[str] = None
use_juvix_question: Optional[questionary.Question] = None


def on_startup(self, *, command: str, dirty: bool) -> None:
clear_screen()
Expand All @@ -1569,17 +1569,8 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig:

self.env.SITE_DIR = config.get("site_dir", getenv("SITE_DIR", None))

if not os.environ.get("CI") or os.getenv("NO_INTERACTION"):
self.use_juvix_question = questionary.select(
"Do you want to process Juvix Markdown files (this will take longer)?",
choices=["yes", "no", "always", "never"],
default="no",
)
self.response = self.use_juvix_question.ask()
if self.response == "never":
self.env.JUVIX_ENABLED = False
elif self.response == "always":
self.env.JUVIX_ENABLED = True
if os.getenv("SKIP_JUVIX"):
self.env.JUVIX_ENABLED = False

if self.env.JUVIX_ENABLED and not self.env.JUVIX_AVAILABLE:
log.error(
Expand Down
Loading

0 comments on commit 46bc7d2

Please sign in to comment.