From d8ead752a80805ee15008955d545c2b1638629c7 Mon Sep 17 00:00:00 2001 From: Markus Demleitner Date: Fri, 15 Nov 2024 19:49:36 +0100 Subject: [PATCH 1/3] Adding support for generating a MANIFEST. This is for the new docrepo. We are also already setting a test upload URI so we don't confuse the actual docrepo with any test uploads. This also adds a fakeupload make target that ought to come in handy when writing regression tests. --- MANIFEST.in | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..76fae8c --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,10 @@ +# This file gives (suggestions for) the links from the landing page in +# the format ;;. +# sumission.py will fill in the keys html-doc and pdf-doc, and you +# should have a very good reason to change that. You can add additional +# entries, for instance for schema files and the like. +# at this point can be document and schema. +# For now, no values may include semicolons; if you think you have to +# include curly braces, escape them from python str.format. +html;document;{html-doc} +pdf;document;{pdf-doc} From cbcb500cbe907a752650108ee4b6f971cac627ef Mon Sep 17 00:00:00 2001 From: Markus Demleitner Date: Mon, 18 Nov 2024 20:34:42 +0100 Subject: [PATCH 2/3] First shot at updating the submission script for the new docrepo --- Makefile | 14 ++++- submission.py | 171 ++++++++++++++++++++++++++------------------------ 2 files changed, 102 insertions(+), 83 deletions(-) diff --git a/Makefile b/Makefile index c884e55..9cc4adf 100644 --- a/Makefile +++ b/Makefile @@ -194,13 +194,22 @@ generate: bib-suggestions: $(DOCNAME).pdf $(PYTHON) ivoatex/suggest-bibupgrade.py $(DOCNAME).aux +MANIFEST.in: ivoatex/MANIFEST.in + if [ ! -f MANIFEST.in ]; then cp $^ $@; fi + +MANIFEST: MANIFEST.in Makefile + sed -e "s/{html-doc}/$(versionedName).tex/;\ + s/{pdf-doc}/$(versionedName).pdf/;\ + s/^#.*/# Do not edit this file. Edit MANIFEST.in instead./" $< > $@ + package: $(DOCNAME).tex $(DOCNAME).html $(DOCNAME).pdf \ - $(GENERATED_PNGS) $(FIGURES) $(AUX_FILES) + $(GENERATED_PNGS) $(FIGURES) $(AUX_FILES) MANIFEST rm -rf -- $(versionedName) mkdir $(versionedName) cp $(DOCNAME).tex $(versionedName)/$(versionedName).tex cp $(DOCNAME).html $(versionedName)/$(versionedName).html cp $(DOCNAME).pdf $(versionedName)/$(versionedName).pdf + cp MANIFEST $(versionedName)/MANIFEST ifneq ($(strip $(FIGURES)),) cp $(FIGURES) $(versionedName) @@ -220,6 +229,9 @@ endif upload: package $(PYTHON) ivoatex/submission.py $(versionedName).zip +fakeupload: package + $(PYTHON) ivoatex/submission.py --dry-run $(versionedName).zip + # Build TtH from source. See http://hutchinson.belmont.ma.us/tth/. # TtH source seems to be highly portable, so compilation should be easy diff --git a/submission.py b/submission.py index 3589a94..cedb389 100644 --- a/submission.py +++ b/submission.py @@ -10,20 +10,23 @@ Fields needed: -* doctitle -* conciseName +* title +* concise_name * email * filename -* author -* editor +* authors +* editors * abstract * comment - -* group (one of app, dal, dm, gws, reg, dcp, std, semantics, the, voe, vot, - voq) -* docver1, docver2 -* year, month, day -* doctype (one of note, wd, pr, rec, other) +* group_name (this is controlled vocabulary that authors must follow; + we should probably validate that here) +* version_major, version_minor +* date +* status (one of Note, WD, PR, REC, EN, PEN, Other) + +This is a bit lame in that we (largely) work on the repo rather than the +zipfile to work out these values. If this ever becomes a problem, see +what we are doing to validate the MANIFEST. """ import pprint @@ -32,6 +35,7 @@ import subprocess import sys import tempfile +import zipfile from xml.etree import ElementTree as etree try: @@ -42,7 +46,7 @@ "*** Please install it (Debian: python3-requests; pypi: requests).\n") -DOCREPO_URL = 'https://www.ivoa.net/cgi-bin/up.cgi' +DOCREPO_URL = 'http://testingdocrepo.ivoa.info/new_doc' class ReportableError(Exception): @@ -69,17 +73,17 @@ class DocumentMeta(object): For now, we just use attributes named like the fields in the IVOA docrepo API. """ - _attrs = ["doctitle", "conciseName", "email", - "author", "editor", "abstract", - "comment", "group", "docver1", "docver2", - "year", "month", "day", "doctype"] + _attrs = ["title", "concise_name", "email", + "authors", "editors", "abstract", + "comment", "group_name", "version_major", "version_minor", + "date", "status"] def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) self._authors = [] self._editors = [] - self.group = None + self.group_name = None self.comment = "" def get_post_payload(self): @@ -93,15 +97,10 @@ def get_post_payload(self): payload[name] = getattr(self, name) return payload - def get_date(self): - """returns the document date in ISO format. - """ - return "%s-%s-%s"%(self.year, self.month, self.day) - def add_info_from_document(self): """tries to obtain missing metadata from the formatted (XHTML) source. """ - with open(self.conciseName+".html", "rb") as f: + with open(self.concise_name+".html", "rb") as f: tree = etree.parse(f) # The following would be a bit smoother if we had xpath; there's @@ -111,7 +110,7 @@ def add_info_from_document(self): # first h1 is the document title for el in tree.iter(H("h1")): - self.doctitle = to_text(el) + self.title = to_text(el) break # pull things with ids or unique classes @@ -123,18 +122,18 @@ def add_info_from_document(self): el[0].text = "" self.abstract = to_text(el) elif el.get("id")=="ivoagroup": - self.group = self._get_wg_code(to_text(el)) + self.group_name = to_text(el) elif el.get("class")=="author": self._authors.append(to_text(el)) elif el.get("class")=="editor": self._editors.append(to_text(el)) @property - def author(self): + def authors(self): return ", ".join(self._authors) @property - def editor(self): + def editors(self): return ", ".join(self._editors) @classmethod @@ -151,11 +150,11 @@ def from_makefile(cls): kwargs = {} for input_key, parser_function in [ - ("DOCNAME", lambda v: [("conciseName", v.strip())]), + ("DOCNAME", lambda v: [("concise_name", v.strip())]), ("DOCVERSION", cls._parse_DOCVERSION), ("DOCDATE", cls._parse_DOCDATE), ("AUTHOR_EMAIL", cls._parse_AUTHOR_EMAIL), - ("DOCTYPE", lambda v: [("doctype", v.strip().lower())])]: + ("DOCTYPE", lambda v: [("status", v.strip())])]: if input_key not in meta_keys: raise ReportableError("%s not defined/garbled in Makefile" " but required for upload."%input_key) @@ -165,46 +164,10 @@ def from_makefile(cls): res = cls(**kwargs) if "IVOA_GROUP" in meta_keys: - res.group = res._get_wg_code(meta_keys["IVOA_GROUP"]) + res.group_name = res._get_wg_code(meta_keys["IVOA_GROUP"]) return res - _wg_mapping = { - "Applications": "app", - "DAL": "dal", - "Data Access Layer": "dal", - "Data Models": "dm", - "Grid and Web Services": "gws", - "Registry": "reg", - "Data Curation and Preservation": "dcp", - "Documents and Standards": "std", - "Standards and Processes": "std", - "Semantics": "semantics", - "Theory": "the", - "Technical Coordination Group": "tcg", - "VO Event": "voe", - "Time Domain": "voe", - "Education": "edu", - "Operations": "ops", - "Radio": "rad", - "No Group": "none", - } - - def _get_wg_code(self, wg_string): - """returns one of the docrepo codes for the ivoa WGs. - - This will look at wg_string only if self.group isn't already - set (in which case self.group is simply returned); this allows - overriding the WG name in the Makefile if necessary. - """ - if self.group: - return self.group - if wg_string not in self._wg_mapping.keys(): - raise ReportableError("ivoagroup must be one of %s. If this is" - " really inappropriate, set IVOA_GROUP = No Group in the Makefile"% - ", ".join(self._wg_mapping.keys())) - return self._wg_mapping[wg_string] - @staticmethod def _parse_DOCVERSION(version_string): """helps from_makefile by returning form keys from the document version. @@ -213,21 +176,22 @@ def _parse_DOCVERSION(version_string): if not mat: raise ReportableError("DOCVERSION in Makefile (%s) garbled."% version_string) - yield "docver1", mat.group(1) - yield "docver2", mat.group(2) + yield "version_major", mat.group(1) + yield "version_minor", mat.group(2) @staticmethod def _parse_DOCDATE(date_string): """helps from_makefile by returning form keys from the document date. + + (actually, in the new docrepo we don't need to parse; but + we still do some basic format validation. """ mat = re.match("(\d\d\d\d)-(\d\d)-(\d\d)", date_string) if not mat: raise ReportableError("DOCDATE in Makefile (%s) garbled."% date_string) - yield "year", mat.group(1) - yield "month", mat.group(2) - yield "day", mat.group(3) + yield "date", mat.group() @staticmethod def _parse_AUTHOR_EMAIL(email_string): @@ -254,23 +218,59 @@ def review_and_comment(document_meta): pprint.pprint(document_meta.get_post_payload()) print("-----------------------\n") - print("Going to upload %s\n"%document_meta.doctitle) + print("Going to upload %s\n"%document_meta.title) print("*** Version: %s.%s, %s of %s ***\n"%( - document_meta.docver1, - document_meta.docver2, - document_meta.doctype, - document_meta.get_date())) + document_meta.version_major, + document_meta.version_minor, + document_meta.status, + document_meta.date)) print("Hit ^C if this (or anthing in the dict above) is wrong," " enter to upload.") input() -def main(archive_file_name): +def validate_manifest(archive_file_name): + """raises a ReportableError if we notice anything is wrong with the + MANIFEST. + """ + with zipfile.ZipFile(archive_file_name) as archive: + # strip off the directory part, since that is not part of + # the manifest paths. + members = [n.split("/", 1)[-1] for n in archive.namelist()] + + with open("MANIFEST") as f: + for line_no, line in enumerate(f): + if line.startswith("#") or not line.strip(): + continue + try: + anchor, doctype, path = [s.strip() for s in line.split(";")] + if not path in members: + raise ReportableError( + f"MANIFEST: Missing file in line:{line_no+1}: {path}") + + if doctype not in ["document", "schema"]: + raise ReportableError( + f"MANIFEST: Bad doctype {doctype}" + f" in line:{line_no+1}: {path}") + + except Exception as ex: + raise ReportableError( + f"MANIFEST: bad syntax in line {line_no+1} ({ex})") + + +def main(archive_file_name, dry_run): document_meta = DocumentMeta.from_makefile() document_meta.add_info_from_document() + validate_manifest(archive_file_name) review_and_comment(document_meta) - sys.stdout.write("Uploading... ") - sys.stdout.flush() + print("Uploading... ", end="", flush=True) + + if dry_run: + with open("submission-payload.txt", "w", encoding="utf-8"): + f.write("\n".join(f"{k} {v}" for k, v in + sorted(document_meta.get_post_payload().items()))+"\n") + print("*** Aborted since --dry-run was passed.") + return with open(sys.argv[1], "rb") as upload: resp = requests.post(DOCREPO_URL, @@ -283,11 +283,18 @@ def main(archive_file_name): if __name__=="__main__": + import argparse + parser = argparse.ArgumentParser( + description="Upload an IVOA document") + parser.add_argument("pkgname", help="Name of the archive to upload.") + parser.add_argument("--dry-run", action="store_true", + dest="dry_run", help="Only do local actions, but do no http requests.") + args = parser.parse_args() + try: - if len(sys.argv)!=2: - raise ReportableError( - "Usage: %s "%sys.argv[0]) - main(sys.argv[1]) + main(args.pkgname, args.dry_run) except ReportableError as msg: sys.stderr.write("*** Failure while preparing submission:\n") sys.exit(msg) + +# vim:sta:sw=4:et From 5c49d1fec8dd753df542acfb6cbb7d6dc0e17d13 Mon Sep 17 00:00:00 2001 From: Markus Demleitner Date: Tue, 19 Nov 2024 08:06:28 +0100 Subject: [PATCH 3/3] Adding upload regression tests --- Makefile | 2 +- regressiontest/run-regression.py | 69 +++++++++++++++++++++++++++++--- submission.py | 43 ++++++++++++++++++-- 3 files changed, 103 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 9cc4adf..1ebbb0c 100644 --- a/Makefile +++ b/Makefile @@ -198,7 +198,7 @@ MANIFEST.in: ivoatex/MANIFEST.in if [ ! -f MANIFEST.in ]; then cp $^ $@; fi MANIFEST: MANIFEST.in Makefile - sed -e "s/{html-doc}/$(versionedName).tex/;\ + sed -e "s/{html-doc}/$(versionedName).html/;\ s/{pdf-doc}/$(versionedName).pdf/;\ s/^#.*/# Do not edit this file. Edit MANIFEST.in instead./" $< > $@ diff --git a/regressiontest/run-regression.py b/regressiontest/run-regression.py index 4934c92..7e970f8 100755 --- a/regressiontest/run-regression.py +++ b/regressiontest/run-regression.py @@ -57,7 +57,7 @@ def _assert_has(assertion, found_str, where): assert False, f"Cannot understand assertion: {assertion}" -def execute(cmd, check_output=None, input=None): +def execute(cmd, check_output=None, input=None, expected_status=0): """execute a subprocess.Popen-compatible command cmd under supervision. Specifically, we run with shell=True so the ivoatexDoc recipes work as @@ -67,9 +67,10 @@ def execute(cmd, check_output=None, input=None): check_stdout is not met. For now, that is: neither stdout nor stderr contains a string. """ - output = subprocess.check_output(cmd, shell=True, input=input, - stderr=subprocess.STDOUT) - output = output.decode("utf-8") + proc = subprocess.run(cmd, shell=True, input=input, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + assert proc.returncode == expected_status + output = proc.stdout.decode("utf-8") if check_output is not None: with open("last-output.txt", "w", encoding="utf-8") as f: f.write(output) @@ -307,7 +308,7 @@ def test_auxiliaryurl_and_test(): (r"\section{Normative Nonsense}", "\\section{Normative Nonsense}\n" "See (\\auxiliaryurl{our-instance.xml}) for details.")]) edit_file("Makefile", [ - ('AUX_FILES =', 'AUX_FILES = our-schema.xml')]) + ('AUX_FILES =', 'AUX_FILES = our-instance.xml')]) with open("our-instance.xml", "w") as f: f.write( """ @@ -471,6 +472,58 @@ def test_REC_material(): "has been endorsed by the IVOA Executive Committee") +def test_submission(): + execute("make fakeupload") + assert_in_file("submission-payload.txt", + "group_name Standards and Processes\n", + "title Regression test\n") + + # submission.py at this point parses its metadata out of the + # built html in the repo, *not* the one in the zip. If this + # changes, we have to change these tests, too + os.unlink("submission-payload.txt") + edit_file("Regress.html", [ + ('
Standards and Processes
', + '
Break and Crash
'),]) + + execute( + "python ivoatex/submission.py --dry-run NOTE-Regress-1.0-20230201.zip", + expected_status=1, + check_output="Break and Crash is it an IVOA WG/IG name we recognise.") + os.unlink("Regress.html") + + +def test_manifest(): + execute("cp ivoatex/MANIFEST.in MANIFEST.in") + with open("MANIFEST.in", "a+") as f: + f.write("\nsample;document;my_sample.dat\n") + execute("make fakeupload", expected_status=2, + check_output="MANIFEST: Missing file in line 12: my_sample.dat") + + with open("my_sample.dat", "w") as f: + f.write("abc") + edit_file("Makefile", [("AUX_FILES =", "AUX_FILES = my_sample.dat ")]) + edit_file("MANIFEST.in", [("sample;document;", "sample;dcuoment;")]) + execute("make fakeupload", expected_status=2, + check_output="MANIFEST: Bad doctype dcuoment in line:12") + + edit_file("MANIFEST.in", [("sample;dcuoment;", "sample;document;")]) + execute("make fakeupload") + + # the first line of submission-payload is the name of the upload + # file + with open("submission-payload.txt") as f: + zip_name = next(f).strip() + base_name = zip_name[:-4] + + assert_in_file("MANIFEST", + f"html;document;{base_name}.html", + f"pdf;document;{base_name}.pdf", + "sample;document;my_sample.dat") + + execute(f"unzip -l {zip_name}", check_output="/MANIFEST") + + def run_tests(repo_url, branch_name): os.environ["DOCNAME"] = "Regress" execute("mkdir $DOCNAME") @@ -518,7 +571,11 @@ def run_tests(repo_url, branch_name): test_all_bibliography() - test_REC_material() + test_REC_material() + + test_submission() + + test_manifest() # run_shell() diff --git a/submission.py b/submission.py index cedb389..492c655 100644 --- a/submission.py +++ b/submission.py @@ -48,6 +48,26 @@ DOCREPO_URL = 'http://testingdocrepo.ivoa.info/new_doc' +KNOWN_GROUPS = [ + # this list is from ivoatexDoc. I'm hoping to convince the docrepo folks + # to use it, too. + "Applications", + "Data Access Layer", + "Data Models", + "Grid and Web Services", + "Registry", + "Data Curation and Preservation", + "Standards and Processes", + "Semantics", + "Operations", + "Radio", + "Theory", + "VO Event", + "Time Domain", + "Education", + "No Group", + ] + class ReportableError(Exception): """raise this with a human-readable error message to cause a non-traceback @@ -128,6 +148,13 @@ def add_info_from_document(self): elif el.get("class")=="editor": self._editors.append(to_text(el)) + def validate(self): + # TODO: add more tests here + if self.group_name not in KNOWN_GROUPS: + raise ReportableError(f"{self.group_name} is it an IVOA" + " WG/IG name we recognise. See ivoatexDoc for a" + " list of recognised strings.") + @property def authors(self): return ", ".join(self._authors) @@ -246,12 +273,15 @@ def validate_manifest(archive_file_name): anchor, doctype, path = [s.strip() for s in line.split(";")] if not path in members: raise ReportableError( - f"MANIFEST: Missing file in line:{line_no+1}: {path}") + f"MANIFEST: Missing file in line {line_no+1}: {path}") if doctype not in ["document", "schema"]: raise ReportableError( f"MANIFEST: Bad doctype {doctype}" - f" in line:{line_no+1}: {path}") + f" in line:{line_no+1}") + + except ReportableError: + raise except Exception as ex: raise ReportableError( @@ -261,12 +291,17 @@ def validate_manifest(archive_file_name): def main(archive_file_name, dry_run): document_meta = DocumentMeta.from_makefile() document_meta.add_info_from_document() + document_meta.validate() validate_manifest(archive_file_name) - review_and_comment(document_meta) + if not dry_run: + # dry_run is used in regression testing, and we don't want to + # fake input there + review_and_comment(document_meta) print("Uploading... ", end="", flush=True) if dry_run: - with open("submission-payload.txt", "w", encoding="utf-8"): + with open("submission-payload.txt", "w", encoding="utf-8") as f: + f.write(archive_file_name+"\n") f.write("\n".join(f"{k} {v}" for k, v in sorted(document_meta.get_post_payload().items()))+"\n") print("*** Aborted since --dry-run was passed.")