From 9d3770ced9c4b353c83ed46b0ada7bee68daca2b Mon Sep 17 00:00:00 2001 From: Rongrong Date: Sun, 19 Jun 2022 08:12:08 +0800 Subject: [PATCH 1/3] Add CI to build SVGs with images embedded --- .github/workflows/dist.yml | 40 ++++++++++++++++++++++++ .gitignore | 2 ++ scripts/embed_images.py | 64 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) create mode 100644 .github/workflows/dist.yml create mode 100644 .gitignore create mode 100644 scripts/embed_images.py diff --git a/.github/workflows/dist.yml b/.github/workflows/dist.yml new file mode 100644 index 0000000..a11eb3b --- /dev/null +++ b/.github/workflows/dist.yml @@ -0,0 +1,40 @@ +name: Update dist + +on: + push: + branches: + - main + paths: + - 'images/experiments/**' + - '.github/workflows/dist.yml' + - 'scripts/embed_images.py' + schedule: + - cron: "30 2 * * *" + workflow_dispatch: ~ + +jobs: + update: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Update + run: python -u ./scripts/embed_images.py + + - name: Get current time + run: echo "curr_time=$(date -uIs)" >> $GITHUB_ENV + + - name: Push + uses: JamesIves/github-pages-deploy-action@v4 + with: + branch: dist + folder: dist + git-config-name: github-actions[bot] + git-config-email: 41898282+github-actions[bot]@users.noreply.github.com + commit-message: ${{ env.curr_time }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5444d11 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea/ +dist/ diff --git a/scripts/embed_images.py b/scripts/embed_images.py new file mode 100644 index 0000000..99cfa40 --- /dev/null +++ b/scripts/embed_images.py @@ -0,0 +1,64 @@ +import re +import base64 +from urllib import request +from http.client import HTTPResponse +from pathlib import Path +from functools import lru_cache +from typing import Optional + +matcher = re.compile(r'(?<=xlink:href=")([^"]+)(?=")') + + +@lru_cache(maxsize=64) +def fetch(url: str) -> tuple[bytes, Optional[str]]: + print(f'Fetching {url}') + response: HTTPResponse + for _ in range(3): + try: + with request.urlopen(url, timeout=10) as response: + if response.status != 200: + raise Exception(f'{response.status} {response.reason}') + content_type = response.getheader('Content-Type') + print(f'Fetched {url} with content type {content_type}') + return response.read(), content_type + except Exception as e: + print(f'Failed to fetch {url}: {e}') + if _ == 2: + raise e + + +def replacer(match: re.Match) -> str: + url = match.group(1) + data, content_type = fetch(url) + data = base64.b64encode(data).decode() + content_type = content_type or 'image/png' + return f'data:{content_type};base64,{data}' + + +def embed_images(svg: str) -> str: + return matcher.sub(replacer, svg) + + +def main(): + base_path = Path(__file__).parent.parent + dist_path = base_path / 'dist' + source_path = base_path / 'images' / 'experiments' + + dist_path.mkdir(parents=True, exist_ok=True) + + with (dist_path / 'README.md').open('w') as readme: + readme.write('# Dist\n\n') + + for file in sorted(source_path.glob('*.svg')): + dest = dist_path / file.name + print(f'Processing {file}') + svg = file.read_text() + svg = embed_images(svg) + print(f'Writing to {dest}') + dest.write_text(svg) + readme.write(f'## [{file.name}]({file.name})\n\n' + f'[![{file.name}]({file.name})]({file.name})\n\n') + + +if __name__ == '__main__': + main() From f10f7e409edc0c54df47d76cb59a3fdca0290812 Mon Sep 17 00:00:00 2001 From: Rongrong Date: Sun, 19 Jun 2022 21:00:51 +0800 Subject: [PATCH 2/3] Fix rough matcher --- scripts/embed_images.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/embed_images.py b/scripts/embed_images.py index 99cfa40..6ba306c 100644 --- a/scripts/embed_images.py +++ b/scripts/embed_images.py @@ -6,7 +6,7 @@ from functools import lru_cache from typing import Optional -matcher = re.compile(r'(?<=xlink:href=")([^"]+)(?=")') +matcher = re.compile(r'(?<=xlink:href=")(https?://[^"]+)(?=")') @lru_cache(maxsize=64) From b65d6f1947cc28e24bd0afb5cb573f41a7435b83 Mon Sep 17 00:00:00 2001 From: Rongrong Date: Sun, 19 Jun 2022 21:16:00 +0800 Subject: [PATCH 3/3] Refactor --- scripts/embed_images.py | 76 +++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/scripts/embed_images.py b/scripts/embed_images.py index 6ba306c..8ec8818 100644 --- a/scripts/embed_images.py +++ b/scripts/embed_images.py @@ -4,39 +4,49 @@ from http.client import HTTPResponse from pathlib import Path from functools import lru_cache -from typing import Optional +from typing import Optional, NoReturn -matcher = re.compile(r'(?<=xlink:href=")(https?://[^"]+)(?=")') +class Embedder: + _matcher = re.compile(r'(?<=xlink:href=")(https?://[^"]+)(?=")') -@lru_cache(maxsize=64) -def fetch(url: str) -> tuple[bytes, Optional[str]]: - print(f'Fetching {url}') - response: HTTPResponse - for _ in range(3): - try: - with request.urlopen(url, timeout=10) as response: - if response.status != 200: - raise Exception(f'{response.status} {response.reason}') - content_type = response.getheader('Content-Type') - print(f'Fetched {url} with content type {content_type}') - return response.read(), content_type - except Exception as e: - print(f'Failed to fetch {url}: {e}') - if _ == 2: - raise e + def __init__(self, matcher: re.Pattern = None): + self._matcher = matcher or self._matcher + @staticmethod + @lru_cache(maxsize=64) + def _fetch_remote(url: str) -> tuple[bytes, Optional[str]]: + print(f'Fetching {url}') + response: HTTPResponse + for _ in range(3): + try: + with request.urlopen(url, timeout=10) as response: + if response.status != 200: + raise Exception(f'{response.status} {response.reason}') + content_type = response.getheader('Content-Type') + print(f'Fetched {url} with content type {content_type}') + return response.read(), content_type + except Exception as e: + print(f'Failed to fetch {url}: {e}') + if _ == 2: + raise e -def replacer(match: re.Match) -> str: - url = match.group(1) - data, content_type = fetch(url) - data = base64.b64encode(data).decode() - content_type = content_type or 'image/png' - return f'data:{content_type};base64,{data}' + def _replacer(self, match: re.Match) -> str: + url = match.group(1) + data, content_type = self._fetch_remote(url) + data = base64.b64encode(data).decode() + content_type = content_type or 'image/png' + return f'data:{content_type};base64,{data}' + def embed(self, svg: str) -> str: + return self._matcher.sub(self._replacer, svg) -def embed_images(svg: str) -> str: - return matcher.sub(replacer, svg) + def __call__(self, source: Path, destination: Path) -> NoReturn: + print(f'Processing {source}') + svg = source.read_text() + svg = self.embed(svg) + print(f'Writing to {destination}') + destination.write_text(svg) def main(): @@ -46,18 +56,18 @@ def main(): dist_path.mkdir(parents=True, exist_ok=True) + embeder = Embedder() + with (dist_path / 'README.md').open('w') as readme: readme.write('# Dist\n\n') for file in sorted(source_path.glob('*.svg')): dest = dist_path / file.name - print(f'Processing {file}') - svg = file.read_text() - svg = embed_images(svg) - print(f'Writing to {dest}') - dest.write_text(svg) - readme.write(f'## [{file.name}]({file.name})\n\n' - f'[![{file.name}]({file.name})]({file.name})\n\n') + embeder(file, dest) + readme.write( + f'## [{file.name}]({file.name})\n\n' + f'[![{file.name}]({file.name})]({file.name})\n\n' + ) if __name__ == '__main__':