Skip to content

Commit

Permalink
feat: aggregating pages together
Browse files Browse the repository at this point in the history
- Fixed page numbers not being coherent
- Improved concurrency and performance
  • Loading branch information
adrienbrignon committed Jun 7, 2024
1 parent f08ddff commit bdc6c75
Show file tree
Hide file tree
Showing 17 changed files with 185 additions and 76 deletions.
6 changes: 6 additions & 0 deletions docs/assets/stylesheets/custom.css
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
}
}

@page {
@bottom-center {
content: 'Page ' counter(page) ' of ' counter(pages);
}
}

.md-icon-spin {
animation-name: spin;
animation-duration: 3s;
Expand Down
2 changes: 2 additions & 0 deletions docs/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ hide:
- navigation
---

<div class="mkdocs-exporter" style="display: none;"></div>

# Getting started

## Introduction
Expand Down
2 changes: 2 additions & 0 deletions docs/reference/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
::: mkdocs_exporter.config.ButtonConfig

::: mkdocs_exporter.config.FormatsConfig

::: mkdocs_exporter.config.LoggingConfig
4 changes: 2 additions & 2 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ plugins:
show_source: true
show_labels: false
show_root_heading: true
- privacy:
log_level: error
- search:
lang: en
- awesome-pages
Expand All @@ -77,6 +75,8 @@ plugins:
cards_layout_options:
background_color: '#EA2027'
- exporter:
logging:
level: debug
formats:
pdf:
enabled: !ENV [MKDOCS_EXPORTER_PDF, true]
Expand Down
9 changes: 9 additions & 0 deletions mkdocs_exporter/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ class ButtonConfig(BaseConfig):
"""Some extra attributes to add to the button."""


class LoggingConfig(BaseConfig):
"""The logging configuration."""

level = c.Choice(['debug', 'info', 'warning', 'error', 'critical'], default='info')


class Config(BaseConfig):
"""The plugin's configuration."""

Expand All @@ -39,3 +45,6 @@ class Config(BaseConfig):

buttons = c.ListOfItems(c.SubConfig(ButtonConfig))
"""The buttons to add."""

logging = c.SubConfig(LoggingConfig)
"""The logging configuration."""
45 changes: 26 additions & 19 deletions mkdocs_exporter/formats/pdf/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,53 @@

import os

from pypdf import PdfReader, PdfWriter
from pypdf import PdfWriter

from mkdocs_exporter.formats.pdf.renderer import Renderer
from mkdocs_exporter.formats.pdf.preprocessor import Preprocessor


class Aggregator:
"""Aggregates PDF documents together."""


def __init__(self, renderer: Renderer):
"""The constructor."""

self.total_pages = 0
self.renderer = renderer


def open(self, path: str) -> Aggregator:
"""Opens the aggregator."""

self.path = path
self.writer = PdfWriter()


def covers(self, mode: str) -> Aggregator:
"""Defines the way of handling cover pages."""
def increment_total_pages(self, total_pages: int) -> Aggregator:
"""Increments the total pages count."""

self._covers = mode
self.total_pages = self.total_pages + total_pages


def aggregate(self, pages: list) -> Aggregator:
"""Aggregates pages together."""
def preprocess(self, html: str, page_number: int = 1) -> str:
"""Preprocesses the page."""

for n, page in enumerate(pages):
if 'pdf' not in page.formats:
continue
preprocessor = Preprocessor()

bounds = None
pdf = page.formats['pdf']['path']
total = len(PdfReader(pdf).pages)
preprocessor.preprocess(html)
preprocessor.metadata({'page': page_number, 'pages': self.total_pages})

if 'covers' in page.formats['pdf']:
covers = page.formats['pdf']['covers']
return preprocessor.done()

if self._covers == 'none':
bounds = (1 if covers['front'] else 0, (total - 1) if covers['back'] else total)
if self._covers == 'limits':
bounds = (1 if n != 0 and covers['front'] else 0, (total - 1) if n != (len(pages) - 1) and covers['back'] else total)

self.writer.append(pdf, pages=bounds)
def append(self, document: str) -> Aggregator:
"""Appends a document to this one."""

self.writer.append(document)

return self


def save(self, metadata={}) -> Aggregator:
Expand Down
18 changes: 10 additions & 8 deletions mkdocs_exporter/formats/pdf/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,28 +84,30 @@ async def close(self) -> Browser:
return self


async def print(self, html: str) -> bytes:
"""Prints some HTML to PDF."""
async def print(self, html: str) -> tuple[bytes, int]:
"""Prints some HTML to PDF and returns the PDF and the number of pages printed."""

page = await self.context.new_page()
pages = 0
context = await self.context.new_page()
file = NamedTemporaryFile(suffix='.html', mode='w+', encoding='utf-8', delete=False)

file.write(html)
file.close()

await page.goto('file://' + file.name, wait_until='networkidle')
await page.locator('body[mkdocs-exporter="true"]').wait_for(timeout=self.timeout)
await context.goto('file://' + file.name, wait_until='networkidle')
await context.locator('body[mkdocs-exporter="true"]').wait_for(timeout=self.timeout)

pdf = await page.pdf(prefer_css_page_size=True, print_background=True, display_header_footer=False)
pages = int(await context.locator('body').get_attribute('mkdocs-exporter-pages') or 0)
pdf = await context.pdf(prefer_css_page_size=True, print_background=True, display_header_footer=False)

try:
os.unlink(file)
except Exception:
pass

await page.close()
await context.close()

return pdf
return (pdf, pages)


async def log(self, msg):
Expand Down
91 changes: 59 additions & 32 deletions mkdocs_exporter/formats/pdf/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import types
import asyncio

from typing import Optional, Coroutine, Sequence
from typing import Optional

from mkdocs.plugins import BasePlugin
from mkdocs.plugins import event_priority
from mkdocs.livereload import LiveReloadServer
from mkdocs.plugins import BasePlugin, CombinedEvent

from mkdocs_exporter.page import Page
from mkdocs_exporter.helpers import concurrently
from mkdocs_exporter.logging import logger
from mkdocs_exporter.formats.pdf.config import Config
from mkdocs_exporter.formats.pdf.renderer import Renderer
Expand All @@ -25,7 +26,9 @@ def __init__(self):
self.watch: list[str] = []
self.renderer: Optional[Renderer] = None
self.tasks: list[types.CoroutineType] = []
self.aggregator: Optional[Aggregator] = None
self.loop: Optional[asyncio.AbstractEventLoopPolicy] = None
self.on_post_build = CombinedEvent(self._on_post_build_1, self._on_post_build_2, self._on_post_build_3)


def on_config(self, config: dict) -> None:
Expand Down Expand Up @@ -84,16 +87,16 @@ def on_page_markdown(self, markdown: str, page: Page, config: Config, **kwargs)
def on_pre_build(self, **kwargs) -> None:
"""Invoked before the build process starts."""

self.tasks.clear()

if not self._enabled():
return

self.loop = asyncio.new_event_loop()
self.renderer = Renderer(options=self.config)

asyncio.set_event_loop(self.loop)
if self.config.aggregator.get('enabled'):
self.aggregator = Aggregator(renderer=self.renderer)

self.renderer = Renderer(options=self.config)
asyncio.set_event_loop(self.loop)

for stylesheet in self.config.stylesheets:
self.renderer.add_stylesheet(stylesheet)
Expand All @@ -107,8 +110,6 @@ def on_pre_page(self, page: Page, config: dict, **kwargs):

if not self._enabled():
return
if not hasattr(page, 'html'):
raise Exception('Missing `exporter` plugin or your plugins are not ordered properly!')

directory = os.path.dirname(page.file.abs_dest_path)
filename = os.path.splitext(os.path.basename(page.file.abs_dest_path))[0] + '.pdf'
Expand All @@ -127,58 +128,85 @@ def on_post_page(self, html: str, page: Page, **kwargs) -> Optional[str]:
if not self._enabled(page) and 'pdf' in page.formats:
del page.formats['pdf']

page.html = html

if 'pdf' in page.formats:
async def render(page: Page) -> None:
logger.info("[mkdocs-exporter.pdf] Rendering '%s'...", page.file.src_path)

html = self.renderer.preprocess(page)
pdf = await self.renderer.render(html)
pdf, pages = await self.renderer.render(html)

page.formats['pdf']['pages'] = pages

with open(page.formats['pdf']['path'], 'wb+') as file:
file.write(pdf)
logger.info("[mkdocs-exporter.pdf] File written to '%s'!", file.name)

if self.aggregator:
self.aggregator.increment_total_pages(pages)

self.tasks.append(render(page))

return page.html


@event_priority(-100)
def on_post_build(self, config: dict, **kwargs) -> None:
@event_priority(-90)
def _on_post_build_1(self, **kwargs) -> None:
"""Invoked after the build process."""

if not self._enabled():
return
while self.tasks:
self.loop.run_until_complete(asyncio.gather(*concurrently(self.tasks, max(1, self.config.concurrency or 1))))


@event_priority(-95)
def _on_post_build_2(self, config: dict, **kwargs) -> None:
"""Invoked after the build process."""

if not self._enabled() or not self.aggregator:
return

def concurrently(coroutines: Sequence[Coroutine], concurrency: int) -> Sequence[Coroutine]:
semaphore = asyncio.Semaphore(concurrency)
output = self.config['aggregator']['output']
self.pages = [page for page in self.pages if 'pdf' in page.formats]

async def limit(coroutine: Coroutine) -> Coroutine:
async with semaphore:
return await asyncio.create_task(coroutine)
logger.info("[mkdocs-exporter.pdf] Aggregating %d pages from %d documents together as '%s'...", self.aggregator.total_pages, len(self.pages), output)

return [limit(coroutine) for coroutine in coroutines]
async def render(page: Page, page_number: int) -> None:
html = self.aggregator.preprocess(self.renderer.preprocess(page), page_number=page_number)
pdf, _ = await self.renderer.render(html)

with open(page.formats['pdf']['path'] + '.aggregate', 'wb+') as file:
file.write(pdf)

for n, page in enumerate(self.pages):
self.tasks.append(render(page, page_number=sum(page.formats['pdf']['pages'] for page in self.pages[:n])))
while self.tasks:
self.loop.run_until_complete(asyncio.gather(*concurrently(self.tasks, max(1, self.config.concurrency or 1))))

self.aggregator.open(os.path.join(config['site_dir'], output))

for page in self.pages:
self.aggregator.append(page.formats['pdf']['path'] + '.aggregate')
os.unlink(page.formats['pdf']['path'] + '.aggregate')

self.aggregator.save()


@event_priority(-100)
def _on_post_build_3(self, **kwargs) -> None:
"""Invoked after the build process."""

if not self._enabled():
return

self.loop.run_until_complete(asyncio.gather(*concurrently(self.tasks, max(1, self.config.concurrency or 1))))
self.loop.run_until_complete(self.renderer.dispose())
self.tasks.clear()

self.loop = None
self.pages = None
self.renderer = None
self.aggregator = None

asyncio.set_event_loop(self.loop)

if self.config.get('aggregator', {})['enabled']:
aggregator = Aggregator()
aggregator_config = self.config.get('aggregator', {})

aggregator.open(os.path.join(config['site_dir'], aggregator_config['output']))
aggregator.covers(aggregator_config['covers'])
aggregator.aggregate(self.pages)
aggregator.save(metadata=aggregator_config['metadata'])


@event_priority(-100)
def on_nav(self, nav, **kwargs):
Expand All @@ -195,7 +223,6 @@ def flatten(items):

return pages

self.nav = nav
self.pages = flatten(nav)


Expand Down
4 changes: 2 additions & 2 deletions mkdocs_exporter/formats/pdf/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from urllib.parse import unquote

from mkdocs_exporter.page import Page
from mkdocs_exporter.resources import js
from mkdocs_exporter.formats.pdf.resources import js
from mkdocs_exporter.formats.pdf.browser import Browser
from mkdocs_exporter.renderer import Renderer as BaseRenderer
from mkdocs_exporter.formats.pdf.preprocessor import Preprocessor
Expand Down Expand Up @@ -78,7 +78,7 @@ def preprocess(self, page: Page) -> str:
return preprocessor.done()


async def render(self, page: str | Page) -> bytes:
async def render(self, page: str | Page) -> tuple[bytes, int]:
"""Renders a page as a PDF document."""

if not self.browser.launched:
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,21 @@ window.PagedConfig = {
* Invoked once all pages have been rendered.
*/
after: () => {
if ('__MKDOCS_EXPORTER__' in window) {
const pages = document.getElementsByClassName('pagedjs_pages')[0];

if (pages) {
if ('pages' in __MKDOCS_EXPORTER__) {
pages.style.setProperty('--pagedjs-page-count', __MKDOCS_EXPORTER__.pages);
}

if ('page' in __MKDOCS_EXPORTER__ && pages.children[0]) {
pages.children[0].style.setProperty('counter-reset', `page ${__MKDOCS_EXPORTER__.page}`);
}
}
}

document.body.setAttribute('mkdocs-exporter-pages', document.getElementsByClassName('pagedjs_page').length);
document.body.setAttribute('mkdocs-exporter', 'true');
}

Expand Down
Loading

0 comments on commit bdc6c75

Please sign in to comment.