Skip to content

Commit

Permalink
DEV-1414: compute catalog file for given hathifile name
Browse files Browse the repository at this point in the history
  • Loading branch information
aelkiss committed Dec 5, 2024
1 parent bf51d01 commit bff9a91
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 8 deletions.
32 changes: 25 additions & 7 deletions lib/verifier/hathifiles_verifier.rb
Original file line number Diff line number Diff line change
Expand Up @@ -133,25 +133,43 @@ def run_for_date(date:)
# Verify:
# readable
# TODO: line count must be > than corresponding catalog file
def verify_hathifile_presence(date: current_date)
def verify_hathifile(date: current_date)
update_file = self.class.dated_derivative(location: :HATHIFILE_ARCHIVE, name: "hathi_upd_YYYYMMDD.txt.gz", date: date)
verify_file(path: update_file)
verify_hathifile_contents(path: update_file)
linecount = verify_hathifile_contents(path: update_file)
verify_hathifile_linecount(linecount, catalog_path: catalog_file_for(date))

if date.first_of_month?
full_file = self.class.dated_derivative(location: :HATHIFILE_ARCHIVE, name: "hathi_full_YYYYMMDD.txt.gz", date: date)
verify_file(path: full_file)
verify_hathifile_contents(path: full_file)
linecount = verify_hathifile_contents(path: full_file)
verify_hathifile_linecount(linecount, catalog_path: catalog_file_for(date, full: true))
end
end

def verify_hathifile_contents(path:)
verifier = HathifileContentsVerifier.new(path)
verifier.run
# FIXME: could be inefficient if verifier.errors is very long;
# unnecessary except for testing. Would be better to test
# HathifilesContentsVerifier directly.
@errors.append(*verifier.errors)
@errors.append(verifier.errors)
return verifier.line_count
end

def verify_hathifile_linecount(linecount, catalog_path:)
catalog_linecount = Zlib::GzipReader.open(catalog_path).count
end

def catalog_file_for(date, full: false)
filetype = full ? "full" : "upd"
self.class.dated_derivative(
location: :CATALOG_ARCHIVE,
name: "zephir_#{filetype}_YYYYMMDD.json.gz",
date: date - 1
)
end

def errors
super.flatten
end

end
end
10 changes: 9 additions & 1 deletion spec/unit/hathifiles_verifier_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,15 @@ module PostZephirProcessing
end

describe "#catalog_file_for" do
it "computes a source catalog file based on date - 1"
it "computes a source catalog file based on date - 1" do
expect(described_class.new.catalog_file_for(Date.parse("2023-01-04")))
.to eq("#{ENV['CATALOG_ARCHIVE']}/zephir_upd_20230103.json.gz")
end

it "computes a full source catalog file based on date - 1" do
expect(described_class.new.catalog_file_for(Date.parse("2024-12-01"),full: true))
.to eq("#{ENV['CATALOG_ARCHIVE']}/zephir_full_20241130.json.gz")
end
end
end

Expand Down

0 comments on commit bff9a91

Please sign in to comment.