Skip to content

Commit

Permalink
create pdf out of the documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
HarshCasper committed Nov 21, 2023
1 parent 0a27cb3 commit f1b0734
Show file tree
Hide file tree
Showing 3 changed files with 362 additions and 0 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/docs-pdf-generate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Generate a PDF version of the docs

on:
pull_request:
branches:
- main
workflow_dispatch:

jobs:
pdf:
name: Generate PDF
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Setup Node.js
uses: actions/setup-node@v3
with:
node-version: 18

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.10'

- name: Install dependencies
run: |
npm install -g website2pdf
pip install PyPDF2
- name: Generate PDF
run: python3 scripts/generate_pdf_output.py

- name: Upload the PDF
uses: actions/upload-artifact@v3
with:
name: localstack_docs.pdf
path: ./localstack_docs.pdf
137 changes: 137 additions & 0 deletions scripts/generate_pdf_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import os
import re
import shutil
import subprocess
import time

import PyPDF2
from pdf_list import doc_list


def run_website2pdf():
"""
Executes the 'website2pdf' command to convert websites to PDF.
This command uses a sitemap URL to identify the web pages to convert.
Prints the result of the command execution or any errors encountered.
"""
command = [
"website2pdf",
"--sitemap-url",
"https://docs.localstack.cloud/sitemap.xml",
]
try:
result = subprocess.run(
command,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
print("Command executed successfully. Output:")
print(result.stdout)
except subprocess.CalledProcessError as e:
print("An error occurred while executing the command.")
print(e.stderr)


def find_and_copy_pdfs(source_dir, target_dir):
"""
Finds and copies PDF files from source_dir to target_dir.
Renames the files by removing certain patterns and changing to lowercase.
Parameters:
source_dir (str): The directory to search for PDF files.
target_dir (str): The directory where PDF files will be copied to.
"""
# Create the target directory if it doesn't exist
if not os.path.exists(target_dir):
os.makedirs(target_dir)

for root, dirs, files in os.walk(source_dir):
# Skip the target directory to prevent copying files onto themselves
if root == target_dir:
continue

for file in files:
if file.endswith(".pdf"):
# Construct the full file path
file_path = os.path.join(root, file)

# Remove "| Docs" and parenthetical expressions, then process for other replacements
new_file_name = re.sub(r" \(.*?\)", "", file.replace(" | Docs", ""))
new_file_name = (
new_file_name.lower().replace(" ", "-").replace("&", "and")
)

# Copy the file to the target directory with the new name
shutil.copy(file_path, os.path.join(target_dir, new_file_name))


def merge_pdfs(file_list, output_filename):
"""
Merges multiple PDF files into a single PDF.
Parameters:
file_list (list): A list of filenames of the PDFs to merge.
output_filename (str): The filename for the merged PDF output.
"""
merger = PyPDF2.PdfMerger()

for pdf_file in file_list:
with open(f"final/{pdf_file}.pdf", "rb") as f:
merger.append(f)

with open(output_filename, "wb") as out_file:
merger.write(out_file)


def read_file_list(filename):
"""
Reads a list of filenames from a given text file.
Parameters:
filename (str): The filename of the text file to read.
Returns:
list: A list of filenames read from the file.
"""
with open(filename, "r") as file:
return [line.strip() for line in file]


def delete_folders(folder_list):
"""
Deletes a list of folders.
Parameters:
folder_list (list): A list of folder names to delete.
"""
for folder in folder_list:
folder_path = os.path.join("w2pdf_output", folder)
try:
if os.path.exists(folder_path):
shutil.rmtree(folder_path)
else:
print(f"Folder not found: {folder_path}")
except OSError as e:
print(f"Error deleting folder {folder_path}: {e}")


if __name__ == "__main__":
run_website2pdf()
time.sleep(60)
folders_to_delete = [
"academy",
"contributing",
"developer-hub",
"tags",
"categories",
"applications",
"references/coverage",
]
delete_folders(folders_to_delete)
source_directory = "w2pdf_output"
target_directory = "final"
find_and_copy_pdfs(source_directory, target_directory)
merge_pdfs(doc_list, "localstack_docs.pdf")
print("The PDF files have been merged into a single PDF file: localstack_docs.pdf")
186 changes: 186 additions & 0 deletions scripts/pdf_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
doc_list = [
'overview',
'installation',
'auth-token',
'quickstart',
'frequently-asked-questions',
'glossary',
'help-and-support',
'integrations',
'aws-command-line-interface',
'serverless-framework',
'testcontainers',
'spring-cloud-function',
'architect',
'aws-copilot-cli',
'crossplane',
'terraform',
'aws-sam',
'aws-cdk',
'pulumi',
'cdk-for-terraform',
'self-managed-kafka-cluster',
'aws-chalice',
'gitpod',
'openshift',
'former2',
'cloud-custodian',
'kubernetes',
'language-sdks',
'.net',
'c++',
'go',
'java',
'javascript',
'php',
'python-boto3',
'ruby',
'aws-service-feature-coverage',
'amazon-api-gateway',
'appconfig',
'application-auto-scaling',
'appsync',
'athena',
'aws-amplify',
'aws-certificate-manager',
'organizations',
'backup',
'batch',
'cloudformation',
'cloudfront',
'cloudtrail',
'cloudwatch',
'codecommit',
'cognito',
'config',
'cost-explorer',
'documentdb',
'dynamodb',
'elastic-beanstalk',
'elastic-compute-cloud',
'elastic-container-registry',
'elastic-container-service',
'elastic-file-system',
'elastic-kubernetes-service',
'elastic-load-balancing',
'elastic-mapreduce',
'elasticache',
'elasticsearch-service',
'elemental-mediastore',
'eventbridge',
'fault-injection-simulator',
'glacier',
'glue',
'identity-and-access-management',
'iot',
'key-management-service',
'kinesis',
'kinesis-data-analytics',
'kinesis-data-firehose',
'lambda',
'cloudwatch-logs',
'managed-streaming-for-kafka',
'managed-workflows-for-apache-airflow',
'mq',
'neptune',
'opensearch-service',
'quantum-ledger-database',
'redshift',
'relational-database-service',
'resource-groups',
'route53',
's3',
'sagemaker',
'secrets-manager',
'security-token-service',
'serverless-application-repository',
'service-discovery',
'simple-email-service',
'simple-notification-service',
'simple-queue-service',
'simple-workflow-service',
'step-functions',
'support',
'systems-manager',
'timestream',
'transcribe',
'transfer',
'x-ray',
'chaos-engineering',
'fault-injection-simulator-experiments',
'outages-extension',
'route53-failover-with-fis',
'subsequent-configurations',
'chaos-engineering-dashboard',
'continuous-integration',
'ci-analytics',
'circleci',
'drone-ci',
'github-actions',
'travis-ci',
'gitlab-ci',
'harness-ci',
'ci-keys',
'localstack-extensions',
'getting-started',
'managing-extensions',
'developing-extensions',
'official-extensions',
'cloud-pods',
'getting-started',
'cli-command-reference',
'remotes',
'launchpad',
'community-cloud-pods',
'cloud-sandbox',
'ephemeral-instances',
'application-preview',
'security-testing',
'iam-policy-enforcement',
'explainable-iam',
'iam-policy-stream',
'localstack-testing-tools',
'cockpit',
'localsurf',
'localstack-desktop',
'localstack-docker-extension',
'lambda-tools',
'hot-reloading',
'remote-debugging',
'lambda-vscode-extension',
'transparent-endpoint-injection',
'dns-server',
'localstack-web-application',
'accounts',
'workspaces',
'managing-users-and-licenses',
'resource-browser',
'extensions-library',
'cloud-pods-browser',
'export-and-import-state',
'stack-insights',
'single-sign-on',
'sso-for-azure-ad',
'references',
'network-troubleshooting',
'accessing-a-resource-created-by-localstack',
'accessing-localstack-via-the-endpoint-url',
'transparent-endpoint-injection',
'configuration',
'arm64-support',
'credentials',
'cross-account-and-cross-region-access',
'custom-tls-certificates',
'docker-images',
'extensions-reference',
'external-service-port-range',
'filesystem-layout',
'initialization-hooks',
'internal-endpoints',
'logging',
'multi-account-setups',
'persistence',
'podman',
'usage-tracking',
'api-key'
]

0 comments on commit f1b0734

Please sign in to comment.