Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add last verified script #3154

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions .github/workflows/build-tutorials.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ jobs:
- name: Checkout Tutorials
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Setup Linux
uses: pytorch/pytorch/.github/actions/setup-linux@main
Expand Down Expand Up @@ -115,6 +117,8 @@ jobs:
- name: Checkout Tutorials
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Setup Linux
uses: pytorch/pytorch/.github/actions/setup-linux@main
Expand Down
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,29 @@ download:
wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip -P $(DATADIR)
unzip -o $(DATADIR)/PennFudanPed.zip -d intermediate_source/data/

download-last-reviewed-json:
@echo "Downloading tutorials-review-data.json..."
curl -o tutorials-review-data.json https://raw.githubusercontent.com/pytorch/tutorials/refs/heads/last-reviewed-data-json/tutorials-review-data.json
@echo "Finished downloading tutorials-review-data.json."
docs:
make download
make download-last-reviewed-json
make html
@python insert_last_verified.py $(BUILDDIR)/html
rm -rf docs
cp -r $(BUILDDIR)/html docs
touch docs/.nojekyll
rm -rf tutorials-review-data.json

html-noplot:
$(SPHINXBUILD) -D plot_gallery=0 -b html $(SPHINXOPTS) "$(SOURCEDIR)" "$(BUILDDIR)/html"
# bash .jenkins/remove_invisible_code_block_batch.sh "$(BUILDDIR)/html"
@echo
make download-last-reviewed-json
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
@echo "Running post-processing script to insert 'Last Verified' dates..."
@python insert_last_verified.py $(BUILDDIR)/html
rm -rf tutorials-review-data.json

clean-cache:
make clean
Expand Down
12 changes: 12 additions & 0 deletions beginner_source/deep_learning_nlp_tutorial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
:orphan:

Deep Learning for NLP with Pytorch
===================================

This tutorial has been deprecated.

Redirecting to a newer tutorial in 3 seconds...

.. raw:: html

<meta http-equiv="Refresh" content="3; url='https://pytorch.org/tutorials/index.html'" />
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
:orphan:

T5-Base Model for Summarization, Sentiment Classification, and Translation
==========================================================================

Expand Down
180 changes: 180 additions & 0 deletions insert_last_verified.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import json
import os
import subprocess
import sys
from datetime import datetime

from bs4 import BeautifulSoup

# Check if the build directory is provided as an argument in the Makefile
if len(sys.argv) < 2:
print("Error: Build directory not provided. Exiting.")
exit(1)

build_dir = sys.argv[1]
print(f"Build directory: {build_dir}")

json_file_path = "tutorials-review-data.json"
build_dir = "_build/html" # for testing after _build/html is created

# paths to skip from the post-processing script
paths_to_skip = [
"beginner/examples_autograd/two_layer_net_custom_function", # not present in the repo
"beginner/examples_nn/two_layer_net_module", # not present in the repo
"beginner/examples_tensor/two_layer_net_numpy", # not present in the repo
"beginner/examples_tensor/two_layer_net_tensor", # not present in the repo
"beginner/examples_autograd/two_layer_net_autograd", # not present in the repo
"beginner/examples_nn/two_layer_net_optim", # not present in the repo
"beginner/examples_nn/two_layer_net_nn", # not present in the repo
"intermediate/coding_ddpg", # not present in the repo - will delete the carryover
]
# Mapping of source directories to build directories
source_to_build_mapping = {
"beginner": "beginner_source",
"recipes": "recipes_source",
"distributed": "distributed",
"intermediate": "intermediate_source",
"prototype": "prototype_source",
"advanced": "advanced_source",
"": "", # root dir for index.rst
}


# Use git log to get the creation date of the file
def get_creation_date(file_path):
try:
result = subprocess.run(
["git", "log", "--diff-filter=A", "--format=%aD", "--", file_path],
capture_output=True,
text=True,
check=True,
)
if result.stdout:
creation_date = result.stdout.splitlines()[0]
creation_date = datetime.strptime(creation_date, "%a, %d %b %Y %H:%M:%S %z")
formatted_date = creation_date.strftime("%b %d, %Y")
else:
formatted_date = "Unknown"
return formatted_date
except subprocess.CalledProcessError:
return "Unknown"


# Use git log to get the last updated date of the file
def get_last_updated_date(file_path):
try:
result = subprocess.run(
["git", "log", "-1", "--format=%aD", "--", file_path],
capture_output=True,
text=True,
check=True,
)
if result.stdout:
last_updated_date = result.stdout.strip()
last_updated_date = datetime.strptime(
last_updated_date, "%a, %d %b %Y %H:%M:%S %z"
)
formatted_date = last_updated_date.strftime("%b %d, %Y")
else:
formatted_date = "Unknown"
return formatted_date
except subprocess.CalledProcessError:
return "Unknown"


# Try to find the source file with the given base path and the extensions .rst and .py
def find_source_file(base_path):
for ext in [".rst", ".py"]:
source_file_path = base_path + ext
if os.path.exists(source_file_path):
return source_file_path
return None


# Function to process a JSON file and insert the "Last Verified" information into the HTML files
def process_json_file(json_file_path):
with open(json_file_path, "r", encoding="utf-8") as json_file:
json_data = json.load(json_file)

for entry in json_data:
path = entry["Path"]
last_verified = entry["Last Verified"]
status = entry.get("Status", "")
if path in paths_to_skip:
print(f"Skipping path: {path}")
continue
if status in ["needs update", "not verified"]:
formatted_last_verified = "Not Verified"
elif last_verified:
try:
last_verified_date = datetime.strptime(last_verified, "%Y-%m-%d")
formatted_last_verified = last_verified_date.strftime("%b %d, %Y")
except ValueError:
formatted_last_verified = "Unknown"
else:
formatted_last_verified = "Not Verified"
if status == "deprecated":
formatted_last_verified += "Deprecated"

for build_subdir, source_subdir in source_to_build_mapping.items():
if path.startswith(build_subdir):
html_file_path = os.path.join(build_dir, path + ".html")
base_source_path = os.path.join(
source_subdir, path[len(build_subdir) + 1 :]
)
source_file_path = find_source_file(base_source_path)
break
else:
print(f"Warning: No mapping found for path {path}")
continue

if not os.path.exists(html_file_path):
print(
f"Warning: HTML file not found for path {html_file_path}."
"If this is a new tutorial, please add it to the audit JSON file and set the Verified status and todays's date."
)
continue

if not source_file_path:
print(f"Warning: Source file not found for path {base_source_path}.")
continue

created_on = get_creation_date(source_file_path)
last_updated = get_last_updated_date(source_file_path)

with open(html_file_path, "r", encoding="utf-8") as file:
soup = BeautifulSoup(file, "html.parser")
# Check if the <p> tag with class "date-info-last-verified" already exists
existing_date_info = soup.find("p", {"class": "date-info-last-verified"})
if existing_date_info:
print(
f"Warning: <p> tag with class 'date-info-last-verified' already exists in {html_file_path}"
)
continue

h1_tag = soup.find("h1") # Find the h1 tag to insert the dates
if h1_tag:
date_info_tag = soup.new_tag("p", **{"class": "date-info-last-verified"})
date_info_tag["style"] = "color: #6c6c6d; font-size: small;"
# Add the "Created On", "Last Updated", and "Last Verified" information
date_info_tag.string = (
f"Created On: {created_on} | "
f"Last Updated: {last_updated} | "
f"Last Verified: {formatted_last_verified}"
)
# Insert the new tag after the <h1> tag
h1_tag.insert_after(date_info_tag)
# Save back to the HTML.
with open(html_file_path, "w", encoding="utf-8") as file:
file.write(str(soup))
else:
print(f"Warning: <h1> tag not found in {html_file_path}")


process_json_file(json_file_path)
print(
f"Finished processing JSON file. Please check the output for any warnings. "
"Pages like `nlp/index.html` are generated only during the full `make docs` "
"or `make html` build. Warnings about these files when you run `make html-noplot` "
"can be ignored."
)
2 changes: 2 additions & 0 deletions intermediate_source/pipeline_tutorial.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
:orphan:

Training Transformer models using Pipeline Parallelism
======================================================

Expand Down