Skip to content

Commit

Permalink
arxiv: use arxiv.org urls
Browse files Browse the repository at this point in the history
  • Loading branch information
DonHaul committed Oct 29, 2024
1 parent ff47b1c commit 4b9cce7
Show file tree
Hide file tree
Showing 19 changed files with 62 additions and 61 deletions.
4 changes: 2 additions & 2 deletions docs/e2e_tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ current test session. Many of them (``test-indexer``, ``test-web-e2e.local``) ar
not recorded. You might notice a few requests to ArXiv like so:

* ``GET http://export.arxiv.org/oai2?verb=GetRecord&metadataPrefix=arXiv&identifier=oai...``
* ``GET http://export.arxiv.org/pdf/1806.04664``
* ``GET http://export.arxiv.org/e-print/1806.04664``
* ``GET https://arxiv.org/pdf/1806.04664``
* ``GET https://arxiv.org/e-print/1806.04664``

These are live interactions that are recorded, you can find them in
``tests/e2e/scenarios/arxiv_in_hp/ArxivService/``. If you need to re-record an interaction, simply
Expand Down
6 changes: 3 additions & 3 deletions inspirehep/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,9 +1232,9 @@
"CERN": "CDS Hidden",
"FERMILAB": "Fermilab"
}
ARXIV_PDF_URL = "http://export.arxiv.org/pdf/{arxiv_id}"
ARXIV_PDF_URL_ALTERNATIVE = "http://arxiv.org/pdf/{arxiv_id}"
ARXIV_TARBALL_URL = "http://export.arxiv.org/e-print/{arxiv_id}"
ARXIV_PDF_URL = "https://arxiv.org/pdf/{arxiv_id}"
ARXIV_PDF_URL_ALTERNATIVE = "https://export.arxiv.org/pdf/{arxiv_id}"
ARXIV_TARBALL_URL = "https://arxiv.org/e-print/{arxiv_id}"

ARXIV_CATEGORIES = {
'core': [
Expand Down
1 change: 1 addition & 0 deletions services.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ version: '2.1'

services:
base:
platform: linux/amd64
image: inspire-build/inspire-next-base
tty: true
build:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ request:
Connection: [keep-alive]
User-Agent: [python-requests/2.18.4]
method: GET
url: http://export.arxiv.org/e-print/1404.0579
url: https://arxiv.org/e-print/1404.0579
response:
body: !!binary |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ request:
Connection: [keep-alive]
User-Agent: [python-requests/2.18.4]
method: GET
url: http://export.arxiv.org/pdf/1412.0200
url: https://arxiv.org/pdf/1412.0200
response:
body: !!binary |

Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ request:
Connection: [keep-alive]
User-Agent: [python-requests/2.18.4]
method: GET
url: http://export.arxiv.org/e-print/1404.0579
url: https://arxiv.org/e-print/1404.0579
response:
body: !!binary |
H4sICDDSqFoCA3JldmlzZWQxNDAzMTgudGV4AOz9fWPbRpYmjv5fnwLdg0RkTCqS4nR33KP5Xdtp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ request:
Connection: [keep-alive]
User-Agent: [python-requests/2.18.4]
method: GET
url: http://export.arxiv.org/pdf/1404.0579
url: https://arxiv.org/pdf/1404.0579
response:
body: !!binary |
JVBERi0xLjQKJcfsj6IKNSAwIG9iago8PC9MZW5ndGggNiAwIFIvRmlsdGVyIC9GbGF0ZURlY29k
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ request:
Connection: [keep-alive]
User-Agent: [python-requests/2.18.4]
method: GET
url: http://export.arxiv.org/e-print/1806.05669
url: https://arxiv.org/e-print/1806.05669
response:
body: !!binary |
H4sICDDSqFoCA3JldmlzZWQxNDAzMTgudGV4AOz9fWPbRpYmjv5fnwLdg0RkTCqS4nR33KP5Xdtp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ request:
Connection: [keep-alive]
User-Agent: [python-requests/2.18.4]
method: GET
url: http://export.arxiv.org/pdf/1806.05669
url: https://arxiv.org/pdf/1806.05669
response:
body: !!binary |
JVBERi0xLjQKJcfsj6IKNSAwIG9iago8PC9MZW5ndGggNiAwIFIvRmlsdGVyIC9GbGF0ZURlY29k
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@
"filename":"2208.00828.pdf",
"fulltext":true,
"material":"preprint",
"original_url":"http://export.arxiv.org/pdf/2208.00828"
"original_url":"https://arxiv.org/pdf/2208.00828"
}
],
"references":[
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/workflows/helpers/mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

def fake_download_file(workflow, name, url):
"""Mock download_file_to_workflow func."""
if url == 'http://export.arxiv.org/e-print/1407.7587':
if url == 'https://arxiv.org/e-print/1407.7587':
workflow.files[name] = pkg_resources.resource_stream(
__name__,
os.path.join(
Expand All @@ -39,7 +39,7 @@ def fake_download_file(workflow, name, url):
)
)
return workflow.files[name]
elif url == 'http://export.arxiv.org/pdf/1407.7587':
elif url == 'https://arxiv.org/pdf/1407.7587':
workflow.files[name] = pkg_resources.resource_stream(
__name__,
os.path.join(
Expand Down
8 changes: 4 additions & 4 deletions tests/integration/workflows/test_article_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def test_run_next_wf_is_not_starting_core_selection_wfs(
workflow = build_workflow(record, extra_data={"delay": 10})
mocked_external_services.register_uri(
"GET",
"http://export.arxiv.org/pdf/1802.08709.pdf",
"https://arxiv.org/pdf/1802.08709.pdf",
content=pkg_resources.resource_string(
__name__, os.path.join("fixtures", "1802.08709.pdf")
),
Expand All @@ -398,7 +398,7 @@ def test_run_next_wf_is_not_starting_core_selection_wfs(
)
mocked_external_services.register_uri(
"GET",
"http://export.arxiv.org/e-print/1802.08709.pdf",
"https://arxiv.org/e-print/1802.08709.pdf",
content=pkg_resources.resource_string(
__name__, os.path.join("fixtures", "1802.08709.pdf")
),
Expand Down Expand Up @@ -436,7 +436,7 @@ def test_run_next_wf_is_not_starting_core_selection_wfs(

mocked_external_services.register_uri(
"GET",
"http://export.arxiv.org/pdf/1802.08709.pdf",
"https://arxiv.org/pdf/1802.08709.pdf",
content=pkg_resources.resource_string(
__name__, os.path.join("fixtures", "1802.08709.pdf")
),
Expand All @@ -446,7 +446,7 @@ def test_run_next_wf_is_not_starting_core_selection_wfs(
)
mocked_external_services.register_uri(
"GET",
"http://export.arxiv.org/e-print/1802.08709.pdf",
"https://arxiv.org/e-print/1802.08709.pdf",
content=pkg_resources.resource_string(
__name__, os.path.join("fixtures", "1802.08709.pdf")
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: http://export.arxiv.org/pdf/1407.7587
uri: https://arxiv.org/pdf/1407.7587
response:
body:
string: !!binary |
Expand Down Expand Up @@ -5419,7 +5419,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: http://export.arxiv.org/e-print/1407.7587
uri: https://arxiv.org/e-print/1407.7587
response:
body:
string: !!binary |
Expand Down Expand Up @@ -8590,7 +8590,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: http://export.arxiv.org/pdf/1407.7587
uri: https://arxiv.org/pdf/1407.7587
response:
body:
string: !!binary |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: http://export.arxiv.org/pdf/1407.7587
uri: https://arxiv.org/pdf/1407.7587
response:
body:
string: !!binary |
Expand Down Expand Up @@ -5419,7 +5419,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: http://export.arxiv.org/e-print/1407.7587
uri: https://arxiv.org/e-print/1407.7587
response:
body:
string: !!binary |
Expand Down Expand Up @@ -8590,7 +8590,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: http://export.arxiv.org/pdf/1407.7587
uri: https://arxiv.org/pdf/1407.7587
response:
body:
string: !!binary |
Expand Down Expand Up @@ -19481,7 +19481,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: http://export.arxiv.org/pdf/1407.7587
uri: https://arxiv.org/pdf/1407.7587
response:
body:
string: !!binary |
Expand Down Expand Up @@ -24894,7 +24894,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: http://export.arxiv.org/e-print/1407.7587
uri: https://arxiv.org/e-print/1407.7587
response:
body:
string: !!binary |
Expand Down Expand Up @@ -28065,7 +28065,7 @@ interactions:
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
method: GET
uri: http://export.arxiv.org/pdf/1407.7587
uri: https://arxiv.org/pdf/1407.7587
response:
body:
string: !!binary |
Expand Down
34 changes: 17 additions & 17 deletions tests/unit/workflows/test_workflows_tasks_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def _get_auto_reject_obj(decision, has_core_keywords, fulltext_used):
def test_download_documents():
with requests_mock.Mocker() as requests_mocker:
requests_mocker.register_uri(
'GET', 'http://export.arxiv.org/pdf/1605.03844',
'GET', 'https://arxiv.org/pdf/1605.03844',
content=pkg_resources.resource_string(
__name__, os.path.join('fixtures', '1605.03844.pdf')),
)
Expand All @@ -90,7 +90,7 @@ def test_download_documents():
'documents': [
{
'key': '1605.03844.pdf',
'url': 'http://export.arxiv.org/pdf/1605.03844'
'url': 'https://arxiv.org/pdf/1605.03844'
},
],
} # literature/1458302
Expand All @@ -113,12 +113,12 @@ def test_download_documents():
def test_download_documents_with_multiple_documents():
with requests_mock.Mocker() as requests_mocker:
requests_mocker.register_uri(
'GET', 'http://export.arxiv.org/pdf/1605.03844',
'GET', 'https://arxiv.org/pdf/1605.03844',
content=pkg_resources.resource_string(
__name__, os.path.join('fixtures', '1605.03844.pdf')),
)
requests_mocker.register_uri(
'GET', 'http://export.arxiv.org/pdf/1605.03845',
'GET', 'https://arxiv.org/pdf/1605.03845',
content=pkg_resources.resource_string(
__name__, os.path.join('fixtures', '1605.03844.pdf')),
)
Expand All @@ -130,11 +130,11 @@ def test_download_documents_with_multiple_documents():
'documents': [
{
'key': '1605.03844.pdf',
'url': 'http://export.arxiv.org/pdf/1605.03844'
'url': 'https://arxiv.org/pdf/1605.03844'
},
{
'key': '1605.03845.pdf',
'url': 'http://export.arxiv.org/pdf/1605.03845'
'url': 'https://arxiv.org/pdf/1605.03845'
},
],
} # literature/1458302
Expand Down Expand Up @@ -869,7 +869,7 @@ def test_populate_journal_coverage_does_nothing_if_no_journal_is_found(mock_repl
def test_populate_submission_document():
with requests_mock.Mocker() as requests_mocker:
requests_mocker.register_uri(
'GET', 'http://export.arxiv.org/pdf/1605.03844',
'GET', 'https://arxiv.org/pdf/1605.03844',
content=pkg_resources.resource_string(
__name__, os.path.join('fixtures', '1605.03844.pdf')),
)
Expand All @@ -889,7 +889,7 @@ def test_populate_submission_document():
},
}
extra_data = {
'submission_pdf': 'http://export.arxiv.org/pdf/1605.03844',
'submission_pdf': 'https://arxiv.org/pdf/1605.03844',
}
files = MockFiles({})
assert validate(data['acquisition_source'], subschema) is None
Expand All @@ -903,9 +903,9 @@ def test_populate_submission_document():
{
'fulltext': True,
'key': 'fulltext.pdf',
'original_url': 'http://export.arxiv.org/pdf/1605.03844',
'original_url': 'https://arxiv.org/pdf/1605.03844',
'source': 'submitter',
'url': 'http://export.arxiv.org/pdf/1605.03844',
'url': 'https://arxiv.org/pdf/1605.03844',
},
]
result = obj.data['documents']
Expand All @@ -916,7 +916,7 @@ def test_populate_submission_document():
def test_populate_submission_document_does_not_duplicate_documents():
with requests_mock.Mocker() as requests_mocker:
requests_mocker.register_uri(
'GET', 'http://export.arxiv.org/pdf/1605.03844',
'GET', 'https://arxiv.org/pdf/1605.03844',
content=pkg_resources.resource_string(
__name__, os.path.join('fixtures', '1605.03844.pdf')),
)
Expand All @@ -936,7 +936,7 @@ def test_populate_submission_document_does_not_duplicate_documents():
},
}
extra_data = {
'submission_pdf': 'http://export.arxiv.org/pdf/1605.03844',
'submission_pdf': 'https://arxiv.org/pdf/1605.03844',
}
files = MockFiles({})
assert validate(data['acquisition_source'], subschema) is None
Expand All @@ -951,9 +951,9 @@ def test_populate_submission_document_does_not_duplicate_documents():
{
'fulltext': True,
'key': 'fulltext.pdf',
'original_url': 'http://export.arxiv.org/pdf/1605.03844',
'original_url': 'https://arxiv.org/pdf/1605.03844',
'source': 'submitter',
'url': 'http://export.arxiv.org/pdf/1605.03844',
'url': 'https://arxiv.org/pdf/1605.03844',
},
]
result = obj.data['documents']
Expand All @@ -964,7 +964,7 @@ def test_populate_submission_document_does_not_duplicate_documents():
def test_populate_submission_document_without_pdf():
with requests_mock.Mocker() as requests_mocker:
requests_mocker.register_uri(
'GET', 'http://export.arxiv.org/pdf/1707.02785',
'GET', 'https://arxiv.org/pdf/1707.02785',
content=pkg_resources.resource_string(
__name__, os.path.join('fixtures', '1707.02785.html')),
)
Expand All @@ -985,7 +985,7 @@ def test_populate_submission_document_without_pdf():
assert validate(data['acquisition_source'], subschema) is None

extra_data = {
'submission_pdf': 'http://export.arxiv.org/pdf/1707.02785',
'submission_pdf': 'https://arxiv.org/pdf/1707.02785',
}
files = MockFiles({})
obj = MockObj(data, extra_data, files=files)
Expand Down Expand Up @@ -1492,7 +1492,7 @@ def test_url_is_correctly_escaped():
def test_populate_submission_document_without_documents():
with requests_mock.Mocker() as requests_mocker:
requests_mocker.register_uri(
'GET', 'http://export.arxiv.org/pdf/1605.03844',
'GET', 'https://arxiv.org/pdf/1605.03844',
content=pkg_resources.resource_string(
__name__, os.path.join('fixtures', '1605.03844.pdf')),
)
Expand Down
Loading

0 comments on commit 4b9cce7

Please sign in to comment.