From f1b0734ca12d3a40bb27c285c34a4b2f7ace7fba Mon Sep 17 00:00:00 2001 From: HarshCasper Date: Tue, 21 Nov 2023 12:12:49 +0530 Subject: [PATCH] create pdf out of the documentation --- .github/workflows/docs-pdf-generate.yml | 39 +++++ scripts/generate_pdf_output.py | 137 +++++++++++++++++ scripts/pdf_list.py | 186 ++++++++++++++++++++++++ 3 files changed, 362 insertions(+) create mode 100644 .github/workflows/docs-pdf-generate.yml create mode 100644 scripts/generate_pdf_output.py create mode 100644 scripts/pdf_list.py diff --git a/.github/workflows/docs-pdf-generate.yml b/.github/workflows/docs-pdf-generate.yml new file mode 100644 index 0000000000..8b790f544d --- /dev/null +++ b/.github/workflows/docs-pdf-generate.yml @@ -0,0 +1,39 @@ +name: Generate a PDF version of the docs + +on: + pull_request: + branches: + - main + workflow_dispatch: + +jobs: + pdf: + name: Generate PDF + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Setup Node.js + uses: actions/setup-node@v3 + with: + node-version: 18 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + npm install -g website2pdf + pip install PyPDF2 + + - name: Generate PDF + run: python3 scripts/generate_pdf_output.py + + - name: Upload the PDF + uses: actions/upload-artifact@v3 + with: + name: localstack_docs.pdf + path: ./localstack_docs.pdf diff --git a/scripts/generate_pdf_output.py b/scripts/generate_pdf_output.py new file mode 100644 index 0000000000..568eec444c --- /dev/null +++ b/scripts/generate_pdf_output.py @@ -0,0 +1,137 @@ +import os +import re +import shutil +import subprocess +import time + +import PyPDF2 +from pdf_list import doc_list + + +def run_website2pdf(): + """ + Executes the 'website2pdf' command to convert websites to PDF. + This command uses a sitemap URL to identify the web pages to convert. + Prints the result of the command execution or any errors encountered. + """ + command = [ + "website2pdf", + "--sitemap-url", + "https://docs.localstack.cloud/sitemap.xml", + ] + try: + result = subprocess.run( + command, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + print("Command executed successfully. Output:") + print(result.stdout) + except subprocess.CalledProcessError as e: + print("An error occurred while executing the command.") + print(e.stderr) + + +def find_and_copy_pdfs(source_dir, target_dir): + """ + Finds and copies PDF files from source_dir to target_dir. + Renames the files by removing certain patterns and changing to lowercase. + + Parameters: + source_dir (str): The directory to search for PDF files. + target_dir (str): The directory where PDF files will be copied to. + """ + # Create the target directory if it doesn't exist + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + for root, dirs, files in os.walk(source_dir): + # Skip the target directory to prevent copying files onto themselves + if root == target_dir: + continue + + for file in files: + if file.endswith(".pdf"): + # Construct the full file path + file_path = os.path.join(root, file) + + # Remove "| Docs" and parenthetical expressions, then process for other replacements + new_file_name = re.sub(r" \(.*?\)", "", file.replace(" | Docs", "")) + new_file_name = ( + new_file_name.lower().replace(" ", "-").replace("&", "and") + ) + + # Copy the file to the target directory with the new name + shutil.copy(file_path, os.path.join(target_dir, new_file_name)) + + +def merge_pdfs(file_list, output_filename): + """ + Merges multiple PDF files into a single PDF. + + Parameters: + file_list (list): A list of filenames of the PDFs to merge. + output_filename (str): The filename for the merged PDF output. + """ + merger = PyPDF2.PdfMerger() + + for pdf_file in file_list: + with open(f"final/{pdf_file}.pdf", "rb") as f: + merger.append(f) + + with open(output_filename, "wb") as out_file: + merger.write(out_file) + + +def read_file_list(filename): + """ + Reads a list of filenames from a given text file. + + Parameters: + filename (str): The filename of the text file to read. + + Returns: + list: A list of filenames read from the file. + """ + with open(filename, "r") as file: + return [line.strip() for line in file] + + +def delete_folders(folder_list): + """ + Deletes a list of folders. + + Parameters: + folder_list (list): A list of folder names to delete. + """ + for folder in folder_list: + folder_path = os.path.join("w2pdf_output", folder) + try: + if os.path.exists(folder_path): + shutil.rmtree(folder_path) + else: + print(f"Folder not found: {folder_path}") + except OSError as e: + print(f"Error deleting folder {folder_path}: {e}") + + +if __name__ == "__main__": + run_website2pdf() + time.sleep(60) + folders_to_delete = [ + "academy", + "contributing", + "developer-hub", + "tags", + "categories", + "applications", + "references/coverage", + ] + delete_folders(folders_to_delete) + source_directory = "w2pdf_output" + target_directory = "final" + find_and_copy_pdfs(source_directory, target_directory) + merge_pdfs(doc_list, "localstack_docs.pdf") + print("The PDF files have been merged into a single PDF file: localstack_docs.pdf") diff --git a/scripts/pdf_list.py b/scripts/pdf_list.py new file mode 100644 index 0000000000..8413eb8106 --- /dev/null +++ b/scripts/pdf_list.py @@ -0,0 +1,186 @@ +doc_list = [ + 'overview', + 'installation', + 'auth-token', + 'quickstart', + 'frequently-asked-questions', + 'glossary', + 'help-and-support', + 'integrations', + 'aws-command-line-interface', + 'serverless-framework', + 'testcontainers', + 'spring-cloud-function', + 'architect', + 'aws-copilot-cli', + 'crossplane', + 'terraform', + 'aws-sam', + 'aws-cdk', + 'pulumi', + 'cdk-for-terraform', + 'self-managed-kafka-cluster', + 'aws-chalice', + 'gitpod', + 'openshift', + 'former2', + 'cloud-custodian', + 'kubernetes', + 'language-sdks', + '.net', + 'c++', + 'go', + 'java', + 'javascript', + 'php', + 'python-boto3', + 'ruby', + 'aws-service-feature-coverage', + 'amazon-api-gateway', + 'appconfig', + 'application-auto-scaling', + 'appsync', + 'athena', + 'aws-amplify', + 'aws-certificate-manager', + 'organizations', + 'backup', + 'batch', + 'cloudformation', + 'cloudfront', + 'cloudtrail', + 'cloudwatch', + 'codecommit', + 'cognito', + 'config', + 'cost-explorer', + 'documentdb', + 'dynamodb', + 'elastic-beanstalk', + 'elastic-compute-cloud', + 'elastic-container-registry', + 'elastic-container-service', + 'elastic-file-system', + 'elastic-kubernetes-service', + 'elastic-load-balancing', + 'elastic-mapreduce', + 'elasticache', + 'elasticsearch-service', + 'elemental-mediastore', + 'eventbridge', + 'fault-injection-simulator', + 'glacier', + 'glue', + 'identity-and-access-management', + 'iot', + 'key-management-service', + 'kinesis', + 'kinesis-data-analytics', + 'kinesis-data-firehose', + 'lambda', + 'cloudwatch-logs', + 'managed-streaming-for-kafka', + 'managed-workflows-for-apache-airflow', + 'mq', + 'neptune', + 'opensearch-service', + 'quantum-ledger-database', + 'redshift', + 'relational-database-service', + 'resource-groups', + 'route53', + 's3', + 'sagemaker', + 'secrets-manager', + 'security-token-service', + 'serverless-application-repository', + 'service-discovery', + 'simple-email-service', + 'simple-notification-service', + 'simple-queue-service', + 'simple-workflow-service', + 'step-functions', + 'support', + 'systems-manager', + 'timestream', + 'transcribe', + 'transfer', + 'x-ray', + 'chaos-engineering', + 'fault-injection-simulator-experiments', + 'outages-extension', + 'route53-failover-with-fis', + 'subsequent-configurations', + 'chaos-engineering-dashboard', + 'continuous-integration', + 'ci-analytics', + 'circleci', + 'drone-ci', + 'github-actions', + 'travis-ci', + 'gitlab-ci', + 'harness-ci', + 'ci-keys', + 'localstack-extensions', + 'getting-started', + 'managing-extensions', + 'developing-extensions', + 'official-extensions', + 'cloud-pods', + 'getting-started', + 'cli-command-reference', + 'remotes', + 'launchpad', + 'community-cloud-pods', + 'cloud-sandbox', + 'ephemeral-instances', + 'application-preview', + 'security-testing', + 'iam-policy-enforcement', + 'explainable-iam', + 'iam-policy-stream', + 'localstack-testing-tools', + 'cockpit', + 'localsurf', + 'localstack-desktop', + 'localstack-docker-extension', + 'lambda-tools', + 'hot-reloading', + 'remote-debugging', + 'lambda-vscode-extension', + 'transparent-endpoint-injection', + 'dns-server', + 'localstack-web-application', + 'accounts', + 'workspaces', + 'managing-users-and-licenses', + 'resource-browser', + 'extensions-library', + 'cloud-pods-browser', + 'export-and-import-state', + 'stack-insights', + 'single-sign-on', + 'sso-for-azure-ad', + 'references', + 'network-troubleshooting', + 'accessing-a-resource-created-by-localstack', + 'accessing-localstack-via-the-endpoint-url', + 'transparent-endpoint-injection', + 'configuration', + 'arm64-support', + 'credentials', + 'cross-account-and-cross-region-access', + 'custom-tls-certificates', + 'docker-images', + 'extensions-reference', + 'external-service-port-range', + 'filesystem-layout', + 'initialization-hooks', + 'internal-endpoints', + 'logging', + 'multi-account-setups', + 'persistence', + 'podman', + 'usage-tracking', + 'api-key' +]