Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pipeline to build cookbooks #292

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions .github/scripts/authors.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
[email protected]:
name: Alex Barbet
img: bf2c763-Alex.jpg

[email protected]:
name: Neeral Beladia
img: a3689fe-Neeral.jpg

[email protected]:
name: Harri Bell-Thomas
img: 468e687-Harri.jpg

[email protected]:
name: Ania Bialas
img: c5dc5a3-Ania.jpg

[email protected]:
name: Giannis Chatziveroglou
img: 73153cb-giannis.jpeg

[email protected]:
name: William Darling
img: 7ee933d-William_Darling.jpg

[email protected]:
name: Antoine Debugne
img: edbe0b1-Antoine.jpg

[email protected]:
name: Marco Del Tredici
img: f103c96-Marco.jpg

[email protected]:
name: Shaan Desai
img: d17fc44-Shaan.jpg

[email protected]:
name: Joan Devassy
img: 648e4de-Kyle.jpg

[email protected]:
name: Kyle Duffy
img: 648e4de-Kyle.jpg

[email protected]:
name: Abel Essiane
img: 7ad72f4-Abel.jpg

[email protected]:
name: Boyu Fan
img: 66b2d90-boyufan.JPG.jpg

[email protected]:
name: Utsav Garg
img: 9688517-Utsav.jpg

[email protected]:
name: Jason Jung
img: 0803e3d-Jason_Jung.jpg

[email protected]:
name: Trushant Kalyanpur
img: de428e0-Trushant.jpg

[email protected]:
name: Gokce Keskin
img: fe496a9-Gokce.jpg

[email protected]:
name: Olivia Lasche
img: 7419032-Olivia.jpg

[email protected]:
name: Justin Lee
img: 3678fac-Justin.jpg

[email protected]:
name: Kate Lubrano
img: 1131093-Kate.jpg

[email protected]:
name: Lacey Mclear
img: 88c1977-Lacey.jpg

[email protected]:
name: Vivek Mupalla
img: 3666c64-Vivek.jpg

[email protected]:
name: Aal Patankar
img: d48e622-Aal.jpg

[email protected]:
name: Mauro Schilman
img: 16c3bde-Mauro.jpg

[email protected]:
name: Sanal Shivaprasad
img: 811a185-Sanal.jpg

[email protected]:
name: Anirudh Shrinivason
img: e257c29-Anirudh.jpg

[email protected]:
name: Vanessa Tang
img: f6a8d8e-Vanessa.jpg

[email protected]:
name: Jennifer Tracey
img: 4579ada-Jennifer.jpg

[email protected]:
name: Evren Tumer
img: d69301f-Evren.jpg
125 changes: 125 additions & 0 deletions .github/scripts/build_cookbooks.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's move to scripts folder

Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import yaml
import nbformat
from pathlib import Path
from nbconvert import MarkdownExporter
from jinja2 import Environment, FileSystemLoader
import frontmatter
import re
from base64 import b64encode


BASE_PATH = Path(__file__).resolve().parent
NOTEBOOKS_REPO_PATH = BASE_PATH / "notebooks"
COOKBOOKS_PATH = BASE_PATH / "../../fern/pages/cookbooks"
REGISTRY_FILE = BASE_PATH / "registry.yaml"
AUTHORS_FILE = BASE_PATH / "authors.yaml"
TEMPLATES_PATH = BASE_PATH / "templates"
MARKDOWN_IMAGE_IMPORT_PATTERN = re.compile(r"!\[(.*?)\]\((.*?)\)")
TITLE_PATTERN = re.compile(r"(?m)^#\s.*\n", re.MULTILINE)
SCRIPT_PATTERN = re.compile(r"<script.*?</script>", re.DOTALL)


def _format_bytes_as_base64(data: bytes) -> str:
return b64encode(data).decode("utf-8")


env = Environment(loader=FileSystemLoader(TEMPLATES_PATH))
template = env.get_template('cookbook.md') # Assuming the template name is cookbook.md

def _post_process(body: str, resources: dict[str]) -> str:
"""Perform any transformations to the generated body Markdown."""

body = re.sub(TITLE_PATTERN, "", body, count=1)
body = re.sub(SCRIPT_PATTERN, "", body)

# Convert the image data to base64 encoded strings
outputs = resources["outputs"]
pngs = [k for k in outputs.keys() if k.endswith(".png")]
png_src_dict = {
k: f"data:image/png;base64,{_format_bytes_as_base64(outputs[k])}" for k in pngs
}

# Replace markdown image links with img tags that include the appropriate base64 data
for k, v in png_src_dict.items():
body = body.replace(k, v)

# Format as img tags
def _sub(match):
alt, src = match.groups()
html = f'<img alt="png" src="{src}" alt="{alt}"/>'
return html

body = re.sub(MARKDOWN_IMAGE_IMPORT_PATTERN, _sub, body)

return body

def build_cookbooks():
with open(REGISTRY_FILE, 'r') as file:
registry = yaml.safe_load(file)


# Open author metadata DB
with open(AUTHORS_FILE, "r") as f:
_authors: dict[dict] = yaml.safe_load(f)

md_exporter = MarkdownExporter()

for entry in registry:
if not entry.get("publish", True):
continue

author_email_list = entry.get("authors", list())
print(author_email_list)
authors = [
dict(
name=_authors[email]["name"],
img=_authors[email]["img"],
email=email,
)
for email in author_email_list
if email in _authors
]

notebook_path = NOTEBOOKS_REPO_PATH / entry["path"]
with open(notebook_path, 'r') as f:
notebook_content = nbformat.read(f, as_version=4)
body, resources = md_exporter.from_notebook_node(notebook_content)

# Post-process content
body = _post_process(body, resources)

slug = f"/page/{entry['slug']}"
output_file_path = COOKBOOKS_PATH / f"{entry['slug']}.mdx"

if output_file_path.exists():
with open(output_file_path, 'r', encoding='utf-8') as file:
post = frontmatter.load(file)
existing_metadata = post.metadata
else:
existing_metadata = {}

# Prepare data for rendering the template
body_data = {
'title': entry.get('title', existing_metadata.get('title', 'Default Title')),
'slug': slug,
'description': entry.get('description', existing_metadata.get('description', '')),
'image': entry.get('image', existing_metadata.get('image', '')),
'keywords': entry.get('keywords', existing_metadata.get('keywords', '')),
'body': body,
'authors': authors,
'cookbook_path': entry.get('path', '')
}

# Render the template with the data
content = template.render(body_data)

# Ensure the directory exists
output_file_path.parent.mkdir(parents=True, exist_ok=True)

# Write to file
with open(output_file_path, 'w', encoding='utf-8') as file:
file.write(content)
print(f"Updated or Created {output_file_path}")

if __name__ == "__main__":
build_cookbooks()
Loading
Loading