diff --git a/.circleci/config.yml b/.circleci/config.yml index 193ec51a9..066701cb4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -42,14 +42,21 @@ jobs: - node_modules key: v2-dependencies-npm-{{ checksum "package-lock.json" }} - run: npm run uswds-build - - run: bundle exec jekyll build + # We export the file list in pa11y_targets so that we can make pa11y-ci scan only those changed files in the "Run pa11yci" step + - run: + name: Jekyll build + command: | + bundle exec jekyll build + echo "export PA11Y_TARGETS=$(cat pa11y_targets | base64 --wrap=0)" >> "$BASH_ENV" + source "$BASH_ENV" - run: npm run htmlproofer - run: name: Run rspec command: bundle exec rspec `pwd`/spec/ + # Invoke pa11y-ci and pass in our list of changed files - run: name: Run pa11yci - command: ./serve-pa11yci && npm run pa11y-ci + command: npm run pa11y-ci -- $(echo $PA11Y_TARGETS | base64 --decode) workflows: version: 2 commit: diff --git a/_plugins/pa11y.rb b/_plugins/pa11y.rb new file mode 100644 index 000000000..50022fcca --- /dev/null +++ b/_plugins/pa11y.rb @@ -0,0 +1,191 @@ +=begin + +Determines which files to check with pa11y-ci per pull request. + +The current scope of pa11y testing is as follows: + +- Check any posts or pages whose content has changed. +- Check any posts or pages using changed layouts. +- If stylesheets have changed, check a sample of all page types: + - Three posts of each layout in _config.yml's `defaults` + - The first, last, and middle blog archive pages (/blog/page/:num) + +Previously, checking every page resulted in excessive CI runtimes. + +=end + +# Wraps Jekyll documents with convenience methods. +# +# @param path [String] The relative path of the source file, usually a +# Markdown file (.md) +# @param layout [String] The name of the Jekyll document's layout +# @param differ [Object, #changed_files] The collaborator which lists +# the files that were changed in the last commit. +# @todo Are there cases when the document being checked has no layout? +Document = Struct.new(:path, :layout, :destination, :differ) do + # @return [Boolean] Whether to scan this document in pa11y-ci + def to_scan? + source_changed? || layout_changed? + end + + def source_changed? + changed_files.include?(path) + end + + def layout_changed? + changed_files.include?("_layouts/#{layout}.html") + end + + # Memoize these without having to memoize in the differ + def changed_files + @changed_files ||= differ.changed_files + end +end + +class Differ + # @return [Array] List of relative paths of files + def self.changed_files + @changed_files ||= %x[ #{list_files_command} ].to_s.split("\n").map(&:strip) + end + + # @return [String] Text of a shell command to get listed files + def self.list_files_command + "git ls-files --modified" + end +end + +# Gets the list of files changed in the last commit +class CommitDiffer < Differ + def self.list_files_command + "git diff-tree --no-commit-id --name-only -r $(git rev-parse HEAD)" + end +end + + +class FileChecker + + # Jekyll::Document#destination requires a root path parameter + # This constant replaces a mysterious empty string when + # calling #destination. + NO_ROOT_PATH = "".freeze + + attr_reader :file, :pa11y_target_file, :differ + + def initialize(file, pa11y_target_file, differ) + @file = file + @pa11y_target_file = pa11y_target_file + @differ = differ + end + + def check_file_for_changes + document = Document.new( + file.relative_path, + file.data["layout"], + file.destination(NO_ROOT_PATH), + differ + ) + + if document.to_scan? + File.open(pa11y_target_file, 'a') { |f| + f.write(document.destination + "\n") + } + end + end +end + +PA11Y_TARGET_FILE = ENV.fetch('PA11Y_TARGET_FILE') { 'pa11y_targets' } +DIFFER = ENV.fetch("CI", false) ? CommitDiffer : Differ + +# Outputs posts and pages to scan to a file. +Jekyll::Hooks.register :documents, :post_render do |doc| + FileChecker.new(doc, PA11Y_TARGET_FILE, DIFFER).check_file_for_changes +end + +# The :documents hook above doesn't seem to touch the pages/ directory +Jekyll::Hooks.register :pages, :post_render do |doc| + FileChecker.new(doc, PA11Y_TARGET_FILE, DIFFER).check_file_for_changes +end + +# Produces a sample set of pages needed for a site-wide pa11y test. +class SiteSampler + + attr_reader :config, :site_files, :permalinks + + def initialize(config, site_files=nil, permalinks=nil) + @config = config + @site_files = site_files || default_site_files + @permalinks = permalinks || default_permalinks + end + + def pages + folders.flat_map { |folder| sample folder } + end + + private + + # Samples 3 files from a given folder, and its index + # @todo The matcher should change, not the list of files being returned + def sample(folder) + if ["_site/", "_site/blog/"].include?(folder) + [ File.join(folder, "index.html") ] + else + folder_regex = Regexp.new("^" + folder) + index_regex = Regexp.new("^" + File.join(folder, "index.html") + "$") + site_files.select do |file| + file.match?(folder_regex) || file.match?(index_regex) + end.reject {|filename| filename.match?(/.pdf$/i)}.sample(3) + end + end + + def folders + permalinks.map do |permalink| + File.join("_site", parameterless_path(permalink), "/") + end.uniq.sort + end + + # Default list of all the files in the site + def default_site_files + Dir.glob("_site/**/*").reject { |f| File.directory?(f) } + end + + # Default list of all the permalink URL templates as given + # in the config document. These form the basis for the sampled + # pages later on. + def default_permalinks + [ + config["jekyll-archives"]["permalinks"].values, + config["collections"].map { |_name, data| data["permalink"] }, + config["paginate_path"], + "blog/" + ].flatten.uniq + end + + # Follows the file paths until reaching a :param + def parameterless_path(path) + Pathname.new(path).each_filename.take_while { |x| !x.match? "^:" }.join("/") + end +end + + +# Changes to files in these directories trigger a global check +# @todo Is there a way to get these more dynamically? +SITEWIDE_FOLDERS = ["assets", "_includes", "_sass"] + +# If files have changed in any of the folders that trigger a site-wide scan, +# sample folders across the site and add them to the list of files to check +# with pa11y for a given Pull Request. +# @todo Is there a way to unnest this code? Jekyll hooks don't allow +# early returns. +Jekyll::Hooks.register :site, :post_render do |site| + # This rubular link shows an example of the generated regular expression + # and how it works against an imagined set of changed files + # https://rubular.com/r/nfFL9P69KHSBoY + global_files = Regexp.new SITEWIDE_FOLDERS.map { |x| "^#{x}" }.join("|") + if DIFFER.changed_files.grep(global_files).any? + File.open(PA11Y_TARGET_FILE, 'a') do |f| + SiteSampler.new(site.config).pages.each do |path| + f.write(path + "\n") + end + end + end +end diff --git a/package.json b/package.json index 3f2fb9f1e..954602c39 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,8 @@ "federalist": "npm run uswds-build && mkdir ./.bundle && echo '---\nBUNDLE_GEMFILE: \"GemfileFederalist\"' > ./.bundle/config", "htmlproofer": "bundle exec htmlproofer _site --disable-external --allow-hash-href --ignore-empty-alt --ignore_urls \"18f@gsa.gov\" --no-enforce-https", "htmlproofer-local": "bundle exec htmlproofer _site --disable-external --allow-hash-href --ignore-empty-alt --ignore_urls \"localhost:4000\" --no-enforce-https --swap-urls \"^/site/:/\"", - "pa11y-ci": "pa11y-ci --sitemap http://localhost:4000/site/sitemap.xml --sitemap-exclude '.pdf|/google\\d'", + "pa11y-ci": "pa11y-ci", + "pa11y-sitemap": "pa11y-ci --sitemap http://localhost:4000/site/sitemap.xml --sitemap-exclude '.pdf|/google\\d'", "pa11y-limited": "pa11y-ci --sitemap http://localhost:4000/site/sitemap.xml --sitemap-exclude '.pdf|(/author/)|(/\\d+/)|/tags/|/google\\d'", "pa11y-local": "./killport 4000 && ./serve-pa11yci && npm run pa11y-ci", "restart": "rm -rf _site && ./serve", diff --git a/spec/_plugins/pa11y_spec.rb b/spec/_plugins/pa11y_spec.rb new file mode 100644 index 000000000..aa6b39efd --- /dev/null +++ b/spec/_plugins/pa11y_spec.rb @@ -0,0 +1,101 @@ +require_relative '../../_plugins/pa11y' + +RSpec.describe Document do + + let(:blog_path) { "_posts/2024-04-10-working-with-oracle-databases.md" } + let(:blog_layout) { "post" } + let(:blog_layout_path) { "_layouts/post.html" } + let(:destination_path) { "_site/2024/04/10/working-with-oracle-databases/index.html" } + + # Stand-in for file diffing (GitDiffer), so tests don't rely on + # git history. + class TestDiffer + attr_reader :changed_files + + def initialize(*paths) + @changed_files = *paths + end + end + + # @example Use the convenience method + # TestDiffer("_layouts/default.html", "_posts/2024-04-10-new-post.md") + def TestDiffer(*paths) + TestDiffer.new(*paths) + end + + describe "#to_scan?" do + context "source files" do + let(:document) { + Document.new(blog_path, blog_layout, destination_path, test_differ) + } + + context "with a matching changed source file" do + let(:test_differ) { TestDiffer(blog_path) } + it "returns true" do + expect(document.to_scan?).to be true + end + end + + context "without a matching changed source file" do + let(:test_differ) { TestDiffer() } + it "returns false" do + expect(document.to_scan?).to be false + end + end + + context "with a matching changed layout file" do + let(:test_differ) { TestDiffer(blog_layout_path) } + it "returns true" do + expect(document.to_scan?).to be true + end + end + + context "without a matching changed layout file" do + let(:test_differ) { TestDiffer() } + it "returns false" do + expect(document.to_scan?).to be false + end + end + + context "with a matching changed source file and changed layout file" do + let(:test_differ) { TestDiffer(blog_path, blog_layout_path) } + it "returns true" do + expect(document.to_scan?).to be true + end + end + end + end + +end + + +RSpec.describe SiteSampler do + + describe "#folders" do + context "with no site files or permalinks" do + let(:collector) { SiteSampler.new({}, [], []) } + it "lists no pages" do + expect(collector.pages.count).to eq(0) + end + end + + context "with site files and permalinks" do + let(:site_files) { ["_site/blog/index.html", "_site/blog/page/2/index.html"] } + let(:permalinks) { ["blog/page/:num/", "blog/"] } + let(:collector) { SiteSampler.new({}, site_files, permalinks) } + it "lists pages" do + expect(collector.pages.count).to eq(2) + end + end + + context "with HTML and PDF files" do + let(:site_files) { ["_site/blog/index.html", "_site/blog/page/2/index.html", "_site/partnership-principles/18FPartnershipPrinciples.pdf"] } + let(:permalinks) { ["blog/page/:num/", "blog/"] } + let(:collector) { SiteSampler.new({}, site_files, permalinks) } + it "ignores PDF files" do + expect(collector.pages.count).to eq(2) + end + end + + end +end