Skip to content

Commit

Permalink
Refactor BuildInfo and move to a new module (#12768)
Browse files Browse the repository at this point in the history
  • Loading branch information
AA-Turner authored Aug 11, 2024
1 parent 9d3087c commit a3f1383
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 84 deletions.
3 changes: 2 additions & 1 deletion sphinx/builders/_epub_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from docutils.utils import smartquotes

from sphinx import addnodes
from sphinx.builders.html import BuildInfo, StandaloneHTMLBuilder
from sphinx.builders.html import StandaloneHTMLBuilder
from sphinx.builders.html._build_info import BuildInfo
from sphinx.locale import __
from sphinx.util import logging
from sphinx.util.display import status_iterator
Expand Down
105 changes: 22 additions & 83 deletions sphinx/builders/html/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,16 @@
from __future__ import annotations

import contextlib
import hashlib
import html
import os
import posixpath
import re
import shutil
import sys
import types
import warnings
from os import path
from pathlib import Path
from typing import IO, TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any
from urllib.parse import quote

import docutils.readers.doctree
Expand All @@ -31,6 +30,7 @@
_file_checksum,
_JavaScript,
)
from sphinx.builders.html._build_info import BuildInfo
from sphinx.config import ENUM, Config
from sphinx.deprecation import _deprecation_warning
from sphinx.domains import Domain, Index, IndexEntry
Expand Down Expand Up @@ -63,16 +63,14 @@
from sphinx.writers.html5 import HTML5Translator

if TYPE_CHECKING:
from collections.abc import Iterable, Iterator, Set
from collections.abc import Iterable, Iterator
from typing import TypeAlias

from docutils.nodes import Node
from docutils.readers import Reader

from sphinx.application import Sphinx
from sphinx.config import _ConfigRebuild
from sphinx.environment import BuildEnvironment
from sphinx.util.tags import Tags
from sphinx.util.typing import ExtensionMetadata

#: the filename for the inventory of objects
Expand All @@ -93,23 +91,6 @@
]


def _stable_hash(obj: Any) -> str:
"""Return a stable hash for a Python data structure.

We can't just use the md5 of str(obj) as the order of collections
may be random.
"""
if isinstance(obj, dict):
obj = sorted(map(_stable_hash, obj.items()))
if isinstance(obj, list | tuple | set | frozenset):
obj = sorted(map(_stable_hash, obj))
elif isinstance(obj, type | types.FunctionType):
# The default repr() of functions includes the ID, which is not ideal.
# We use the fully qualified name instead.
obj = f'{obj.__module__}.{obj.__qualname__}'
return hashlib.md5(str(obj).encode(), usedforsecurity=False).hexdigest()


def convert_locale_to_language_tag(locale: str | None) -> str | None:
"""Convert a locale string to a language tag (ex. en_US -> en-US).

Expand All @@ -121,57 +102,6 @@ def convert_locale_to_language_tag(locale: str | None) -> str | None:
return None


class BuildInfo:
"""buildinfo file manipulator.

HTMLBuilder and its family are storing their own envdata to ``.buildinfo``.
This class is a manipulator for the file.
"""

@classmethod
def load(cls: type[BuildInfo], f: IO[str]) -> BuildInfo:
try:
lines = f.readlines()
assert lines[0].rstrip() == '# Sphinx build info version 1'
assert lines[2].startswith('config: ')
assert lines[3].startswith('tags: ')

build_info = BuildInfo()
build_info.config_hash = lines[2].split()[1].strip()
build_info.tags_hash = lines[3].split()[1].strip()
return build_info
except Exception as exc:
raise ValueError(__('build info file is broken: %r') % exc) from exc

def __init__(
self,
config: Config | None = None,
tags: Tags | None = None,
config_categories: Set[_ConfigRebuild] = frozenset(),
) -> None:
self.config_hash = ''
self.tags_hash = ''

if config:
values = {c.name: c.value for c in config.filter(config_categories)}
self.config_hash = _stable_hash(values)

if tags:
self.tags_hash = _stable_hash(sorted(tags))

def __eq__(self, other: BuildInfo) -> bool: # type: ignore[override]
return (self.config_hash == other.config_hash and
self.tags_hash == other.tags_hash)

def dump(self, f: IO[str]) -> None:
f.write('# Sphinx build info version 1\n'
'# This file hashes the configuration used when building these files.'
' When it is not found, a full rebuild will be done.\n'
'config: %s\n'
'tags: %s\n' %
(self.config_hash, self.tags_hash))


class StandaloneHTMLBuilder(Builder):
"""
Builds standalone HTML docs.
Expand Down Expand Up @@ -396,18 +326,28 @@ def math_renderer_name(self) -> str | None:
def get_outdated_docs(self) -> Iterator[str]:
build_info_fname = self.outdir / '.buildinfo'
try:
with open(build_info_fname, encoding="utf-8") as fp:
buildinfo = BuildInfo.load(fp)

if self.build_info != buildinfo:
logger.debug('[build target] did not match: build_info ')
yield from self.env.found_docs
return
build_info = BuildInfo.load(build_info_fname)
except ValueError as exc:
logger.warning(__('Failed to read build info file: %r'), exc)
except OSError:
# ignore errors on reading
pass
else:
if self.build_info != build_info:
# log the mismatch and backup the old build info
build_info_backup = build_info_fname.with_name('.buildinfo.bak')
try:
shutil.move(build_info_fname, build_info_backup)
self.build_info.dump(build_info_fname)
except OSError:
pass # ignore errors
else:
# only log on success
msg = __('build_info mismatch, copying .buildinfo to .buildinfo.bak')
logger.info(bold(__('building [html]: ')) + msg)

yield from self.env.found_docs
return

if self.templates:
template_mtime = int(self.templates.newest_template_mtime() * 10**6)
Expand Down Expand Up @@ -943,8 +883,7 @@ def copy_extra_files(self) -> None:

def write_buildinfo(self) -> None:
try:
with open(path.join(self.outdir, '.buildinfo'), 'w', encoding="utf-8") as fp:
self.build_info.dump(fp)
self.build_info.dump(self.outdir / '.buildinfo')
except OSError as exc:
logger.warning(__('Failed to write build info file: %r'), exc)

Expand Down
94 changes: 94 additions & 0 deletions sphinx/builders/html/_build_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""Record metadata for the build process."""

from __future__ import annotations

import hashlib
import types
from typing import TYPE_CHECKING

from sphinx.locale import __

if TYPE_CHECKING:
from collections.abc import Set
from pathlib import Path
from typing import Any

from sphinx.config import Config, _ConfigRebuild
from sphinx.util.tags import Tags


class BuildInfo:
"""buildinfo file manipulator.

HTMLBuilder and its family are storing their own envdata to ``.buildinfo``.
This class is a manipulator for the file.
"""

@classmethod
def load(cls: type[BuildInfo], filename: Path, /) -> BuildInfo:
content = filename.read_text(encoding="utf-8")
lines = content.splitlines()

version = lines[0].rstrip()
if version != '# Sphinx build info version 1':
msg = __('failed to read broken build info file (unknown version)')
raise ValueError(msg)

if not lines[2].startswith('config: '):
msg = __('failed to read broken build info file (missing config entry)')
raise ValueError(msg)
if not lines[3].startswith('tags: '):
msg = __('failed to read broken build info file (missing tags entry)')
raise ValueError(msg)

build_info = BuildInfo()
build_info.config_hash = lines[2].removeprefix('config: ').strip()
build_info.tags_hash = lines[3].removeprefix('tags: ').strip()
return build_info

def __init__(
self,
config: Config | None = None,
tags: Tags | None = None,
config_categories: Set[_ConfigRebuild] = frozenset(),
) -> None:
self.config_hash = ''
self.tags_hash = ''

if config:
values = {c.name: c.value for c in config.filter(config_categories)}
self.config_hash = _stable_hash(values)

if tags:
self.tags_hash = _stable_hash(sorted(tags))

def __eq__(self, other: BuildInfo) -> bool: # type: ignore[override]
return (self.config_hash == other.config_hash and
self.tags_hash == other.tags_hash)

def dump(self, filename: Path, /) -> None:
build_info = (
'# Sphinx build info version 1\n'
'# This file records the configuration used when building these files. '
'When it is not found, a full rebuild will be done.\n'
f'config: {self.config_hash}\n'
f'tags: {self.tags_hash}\n'
)
filename.write_text(build_info, encoding="utf-8")


def _stable_hash(obj: Any) -> str:
"""Return a stable hash for a Python data structure.

We can't just use the md5 of str(obj) as the order of collections
may be random.
"""
if isinstance(obj, dict):
obj = sorted(map(_stable_hash, obj.items()))
if isinstance(obj, list | tuple | set | frozenset):
obj = sorted(map(_stable_hash, obj))
elif isinstance(obj, type | types.FunctionType):
# The default repr() of functions includes the ID, which is not ideal.
# We use the fully qualified name instead.
obj = f'{obj.__module__}.{obj.__qualname__}'
return hashlib.md5(str(obj).encode(), usedforsecurity=False).hexdigest()

0 comments on commit a3f1383

Please sign in to comment.