diff --git a/test/resources/report.pdf b/test/resources/report.pdf new file mode 100644 index 0000000..9d83e03 Binary files /dev/null and b/test/resources/report.pdf differ diff --git a/test/resources/report1.pdf b/test/resources/report1.pdf deleted file mode 100644 index f2f9284..0000000 Binary files a/test/resources/report1.pdf and /dev/null differ diff --git a/test/resources/report2.pdf b/test/resources/report2.pdf deleted file mode 100644 index 2e034ee..0000000 Binary files a/test/resources/report2.pdf and /dev/null differ diff --git a/test/test_infopage.py b/test/test_infopage.py index 8cc5821..ba73f6b 100644 --- a/test/test_infopage.py +++ b/test/test_infopage.py @@ -4,7 +4,7 @@ from metadata_extract.infopage import InfoPage from metadata_extract.meteor_document import MeteorDocument -doc = MeteorDocument('test/resources/report2.pdf') +doc = MeteorDocument('test/resources/report.pdf') infopagenr = InfoPage.find_page_number(doc.pages) @@ -17,7 +17,7 @@ def test_infopagenr(): def test_find_title(): - expected_title = 'Muligheter og utfordringer for økt karbonbinding i jordbruksjord' + expected_title = 'Metadataekstrahering – Muligheter og innsikt' assert test_infopage.find_title() == expected_title @@ -30,5 +30,5 @@ def test_find_isxn(): def test_find_authors(): authors = test_infopage.find_author() - assert set(authors) == {'Daniel Rasse', 'Inghild Økland', 'Teresa G. Bárcena', - 'Hugh Riley', 'Vegard Martinsen', 'Ievina Sturite'} + assert set(authors) == {'Bjørnstjerne M. Bjørnson', 'Jacobine Camilla-Collett', + 'Henrik J. Ibsen', 'Raymond McArthur', 'John O'} diff --git a/test/test_pdf.py b/test/test_pdf.py index 9ec09f6..9a82409 100644 --- a/test/test_pdf.py +++ b/test/test_pdf.py @@ -5,12 +5,12 @@ meteor = Meteor() -results = meteor.run('test/resources/report1.pdf') +results = meteor.run('test/resources/report.pdf') def test_year(): assert results['year'] == { - "value": 2021, + "value": 2023, "origin": { "type": "COPYRIGHT", "pageNumber": 4 @@ -29,17 +29,16 @@ def test_language(): def test_title(): assert results['title'] == { - "value": "Barnefaglig kompetanse i utlendingsforvaltningen", + "value": "Metadataekstrahering – Muligheter og innsikt", "origin": { - "type": "PDFINFO", - "pageNumber": 1 + "type": "FRONT_PAGE" } } def test_publisher(): assert results['publisher'] == { - "value": "Fafo", + "value": "Nasjonalbiblioteket", "origin": { "type": "COPYRIGHT", "pageNumber": 4 @@ -59,24 +58,43 @@ def test_publication_type(): def test_authors(): expected_dict = [ { - "firstname": "Ragna", - "lastname": "Lillevik", + "firstname": "Bjørnstjerne M.", + "lastname": "Bjørnson", + "origin": { + "type": "INFO_PAGE", + "pageNumber": 2 + } + }, + { + "firstname": "Jacobine", + "lastname": "Camilla-Collett", "origin": { - "type": "FRONT_PAGE" + "type": "INFO_PAGE", + "pageNumber": 2 } }, { - "firstname": "Lene Christin", - "lastname": "Holum", + "firstname": "Henrik J.", + "lastname": "Ibsen", "origin": { - "type": "FRONT_PAGE" + "type": "INFO_PAGE", + "pageNumber": 2 } }, { - "firstname": "Nerina", - "lastname": "Weiss", + "firstname": "Raymond", + "lastname": "McArthur", "origin": { - "type": "FRONT_PAGE" + "type": "INFO_PAGE", + "pageNumber": 2 + } + }, + { + "firstname": "John", + "lastname": "O", + "origin": { + "type": "INFO_PAGE", + "pageNumber": 2 } } ] @@ -94,19 +112,19 @@ def test_authors(): def test_isbn(): assert results['isbn'] == { - "value": "978-82-324-0629-6", + "value": "978-82-17-02298-5", "origin": { "type": "PAGE", - "pageNumber": 4 + "pageNumber": 2 } } def test_issn(): assert results['issn'] == { - "value": "2387-6859", + "value": "2464-1162", "origin": { "type": "PAGE", - "pageNumber": 4 + "pageNumber": 2 } }