Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create pdf out of the documentation #930

Merged
merged 2 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .github/workflows/docs-pdf-generate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Generate a PDF version of the docs

on:
pull_request:
branches:
- main
workflow_dispatch:

jobs:
pdf:
name: Generate PDF
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Setup Node.js
uses: actions/setup-node@v3
with:
node-version: 18

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
npm install -g website2pdf
pip install PyPDF2

- name: Generate PDF
run: python3 scripts/generate_pdf_output.py

- name: Upload the PDF
uses: actions/upload-artifact@v3
with:
name: localstack_docs.pdf
path: ./localstack_docs.pdf
123 changes: 123 additions & 0 deletions scripts/generate_pdf_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import os
import re
import shutil
import subprocess
import time

import PyPDF2
from pdf_list import doc_list


def run_website2pdf():
"""
Executes the 'website2pdf' command to convert websites to PDF.
This command uses a sitemap URL to identify the web pages to convert.
Prints the result of the command execution or any errors encountered.
"""
command = [
"website2pdf",
"--sitemap-url",
"https://docs.localstack.cloud/sitemap.xml",
]
try:
result = subprocess.run(
command,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
print("Command executed successfully. Output:")
print(result.stdout)
except subprocess.CalledProcessError as e:
print("An error occurred while executing the command.")
print(e.stderr)


def find_and_copy_pdfs(source_dir, target_dir):
"""
Finds and copies PDF files from source_dir to target_dir.
Renames the files by removing certain patterns and changing to lowercase.

Parameters:
source_dir (str): The directory to search for PDF files.
target_dir (str): The directory where PDF files will be copied to.
"""
# Create the target directory if it doesn't exist
if not os.path.exists(target_dir):
os.makedirs(target_dir)

for root, dirs, files in os.walk(source_dir):
# Skip the target directory to prevent copying files onto themselves
if root == target_dir:
continue

for file in files:
if file.endswith(".pdf"):
# Construct the full file path
file_path = os.path.join(root, file)

# Remove "| Docs" and parenthetical expressions, then process for other replacements
new_file_name = re.sub(r" \(.*?\)", "", file.replace(" | Docs", ""))
new_file_name = (
new_file_name.lower().replace(" ", "-").replace("&", "and")
)

# Copy the file to the target directory with the new name
shutil.copy(file_path, os.path.join(target_dir, new_file_name))


def merge_pdfs(file_list, output_filename):
"""
Merges multiple PDF files into a single PDF.

Parameters:
file_list (list): A list of filenames of the PDFs to merge.
output_filename (str): The filename for the merged PDF output.
"""
merger = PyPDF2.PdfMerger()

for pdf_file in file_list:
with open(f"final/{pdf_file}.pdf", "rb") as f:
merger.append(f)

with open(output_filename, "wb") as out_file:
merger.write(out_file)


def delete_folders(folder_list):
"""
Deletes a list of folders.

Parameters:
folder_list (list): A list of folder names to delete.
"""
for folder in folder_list:
folder_path = os.path.join("w2pdf_output", folder)
try:
if os.path.exists(folder_path):
shutil.rmtree(folder_path)
else:
print(f"Folder not found: {folder_path}")
except OSError as e:
print(f"Error deleting folder {folder_path}: {e}")


if __name__ == "__main__":
run_website2pdf()
time.sleep(60)
HarshCasper marked this conversation as resolved.
Show resolved Hide resolved
folders_to_delete = [
"academy",
"contributing",
"developer-hub",
"tags",
"categories",
"applications",
"references/coverage",
]
delete_folders(folders_to_delete)
source_directory = "w2pdf_output"
target_directory = "final"
find_and_copy_pdfs(source_directory, target_directory)
merge_pdfs(doc_list, "localstack_docs.pdf")
Copy link
Member

@whummer whummer Nov 21, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: we could add a small check to discover pages that are contained in the target_directory, but not contained in doc_list (and then either print a warning, or fail the script), to make it easier to maintain the list in pdf_list.py over time. (not super critical for now, though..)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, will follow up on the other PR 👍

print("The PDF files have been merged into a single PDF file: localstack_docs.pdf")
184 changes: 184 additions & 0 deletions scripts/pdf_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
doc_list = [
'overview',
'installation',
'auth-token',
'quickstart',
'frequently-asked-questions',
'glossary',
'help-and-support',
'integrations',
'aws-command-line-interface',
'serverless-framework',
'testcontainers',
'spring-cloud-function',
'architect',
'aws-copilot-cli',
'crossplane',
'terraform',
'aws-sam',
'aws-cdk',
'pulumi',
'cdk-for-terraform',
'self-managed-kafka-cluster',
'aws-chalice',
'gitpod',
'openshift',
'former2',
'cloud-custodian',
'kubernetes',
'language-sdks',
'.net',
'c++',
'go',
'java',
'javascript',
'php',
'python-boto3',
'ruby',
'aws-service-feature-coverage',
'amazon-api-gateway',
'appconfig',
'application-auto-scaling',
'appsync',
'athena',
'aws-amplify',
'aws-certificate-manager',
'organizations',
'backup',
'batch',
'cloudformation',
'cloudfront',
'cloudtrail',
'cloudwatch',
'codecommit',
'cognito',
'config',
'cost-explorer',
'documentdb',
'dynamodb',
'elastic-beanstalk',
'elastic-compute-cloud',
'elastic-container-registry',
'elastic-container-service',
'elastic-file-system',
'elastic-kubernetes-service',
'elastic-load-balancing',
'elastic-mapreduce',
'elasticache',
'elasticsearch-service',
'elemental-mediastore',
'eventbridge',
'fault-injection-simulator',
'glacier',
'glue',
'identity-and-access-management',
'iot',
'key-management-service',
'kinesis',
'kinesis-data-analytics',
'kinesis-data-firehose',
'lambda',
'cloudwatch-logs',
'managed-streaming-for-kafka',
'managed-workflows-for-apache-airflow',
'mq',
'neptune',
'opensearch-service',
'quantum-ledger-database',
'redshift',
'relational-database-service',
'resource-groups',
'route53',
's3',
'sagemaker',
'secrets-manager',
'security-token-service',
'serverless-application-repository',
'service-discovery',
'simple-email-service',
'simple-notification-service',
'simple-queue-service',
'simple-workflow-service',
'step-functions',
'support',
'systems-manager',
'timestream',
'transcribe',
'transfer',
'x-ray',
'chaos-engineering',
'fault-injection-simulator-experiments',
'outages-extension',
'route53-failover-with-fis',
'subsequent-configurations',
'chaos-engineering-dashboard',
'continuous-integration',
'ci-analytics',
'circleci',
'drone-ci',
'github-actions',
'travis-ci',
'gitlab-ci',
'harness-ci',
'ci-keys',
'localstack-extensions',
'managing-extensions',
'developing-extensions',
'official-extensions',
'cloud-pods',
'getting-started',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems to be a duplicate with line 125 - on purpose?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like a bug. Will fix later in follow-up PR

'cli-command-reference',
'remotes',
'launchpad',
'community-cloud-pods',
'cloud-sandbox',
'ephemeral-instances',
'application-preview',
'security-testing',
'iam-policy-enforcement',
'explainable-iam',
'iam-policy-stream',
'localstack-testing-tools',
'cockpit',
'localsurf',
'localstack-desktop',
'localstack-docker-extension',
'lambda-tools',
'hot-reloading',
'remote-debugging',
'lambda-vscode-extension',
'dns-server',
'localstack-web-application',
'accounts',
'workspaces',
'managing-users-and-licenses',
'resource-browser',
'extensions-library',
'cloud-pods-browser',
'export-and-import-state',
'stack-insights',
'single-sign-on',
'sso-for-azure-ad',
'references',
'network-troubleshooting',
'accessing-a-resource-created-by-localstack',
'accessing-localstack-via-the-endpoint-url',
'transparent-endpoint-injection',
HarshCasper marked this conversation as resolved.
Show resolved Hide resolved
'configuration',
'arm64-support',
'credentials',
'cross-account-and-cross-region-access',
'custom-tls-certificates',
'docker-images',
'extensions-reference',
'external-service-port-range',
'filesystem-layout',
'initialization-hooks',
'internal-endpoints',
'logging',
'multi-account-setups',
'persistence',
'podman',
'usage-tracking',
'api-key'
]