Skip to content

Commit

Permalink
apiharvester: cantook harvesting
Browse files Browse the repository at this point in the history
* Adds VS and NJ CANTOOK API harvesting.
* Deletes OAI ebooks harvesting.
* Closes #3718.

Co-Authored-by: Peter Weber <[email protected]>
  • Loading branch information
rerowep committed Dec 17, 2024
1 parent e3039c2 commit 406f80e
Show file tree
Hide file tree
Showing 48 changed files with 2,461 additions and 2,529 deletions.
17 changes: 11 additions & 6 deletions data/apisources.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019 RERO
# Copyright (C) 2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand All @@ -16,8 +16,13 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.


# OAI-PMH harvester configuration.
mef:
url: http://mef.test.rero.ch/api/mef
comment: 'mef persons'
size: 1000
# API harvester configuration.
VS-CANTOOK:
url: https://mediatheque-valais.cantookstation.eu
classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook'
code: 'mv-cantook'

NJ-CANTOOK:
url: https://bm.ebibliomedia.ch
classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook'
code: 'ebibliomedia'
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ reverse = "rero_ils.dojson.cli:reverse"
pjson = "rero_ils.dojson.cli:pretty_json_dump"

[tool.poetry.plugins."dojson.cli.rule"]
marc21_ebooks_to_json = "rero_ils.modules.ebooks.dojson.contrib.marc21:marc21"
marc21_dnb_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_dnb"
marc21_kul_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_kul"
marc21_loc_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_loc"
Expand Down Expand Up @@ -223,11 +222,10 @@ users = "rero_ils.modules.users.views:blueprint"
nooppid = "rero_ils.converters:NoopPIDConverter"

[tool.poetry.plugins."invenio_celery.tasks"]
apiharvester = "rero_ils.modules.apiharvester.tasks"
api_harvester = "rero_ils.modules.api_harvester.tasks"
collections = "rero_ils.modules.collections.tasks"
documents = "rero_ils.modules.documents.tasks"
remote_entities = "rero_ils.modules.entities.remote_entities.tasks"
ebooks = "rero_ils.modules.ebooks.tasks"
holdings = "rero_ils.modules.holdings.tasks"
items = "rero_ils.modules.items.tasks"
loans = "rero_ils.modules.loans.tasks"
Expand All @@ -252,7 +250,7 @@ acq_order_lines = "rero_ils.modules.acquisition.acq_order_lines.models"
acq_orders = "rero_ils.modules.acquisition.acq_orders.models"
acq_receipt_lines = "rero_ils.modules.acquisition.acq_receipt_lines.models"
acq_receipts = "rero_ils.modules.acquisition.acq_receipts.models"
apiharvester = "rero_ils.modules.apiharvester.models"
api_harvester = "rero_ils.modules.api_harvester.models"
budgets = "rero_ils.modules.acquisition.budgets.models"
circ_policies = "rero_ils.modules.circ_policies.models"
collections = "rero_ils.modules.collections.models"
Expand Down
24 changes: 12 additions & 12 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,6 @@ def _(x):
"schedule": timedelta(minutes=60),
"enabled": False,
},
"ebooks-harvester": {
"task": "invenio_oaiharvester.tasks.list_records_from_dates",
"schedule": crontab(minute=22, hour=22),
"kwargs": {"name": "ebooks"},
"enabled": False,
},
"notification-creation": {
"task": "rero_ils.modules.notifications.tasks.create_notifications",
"schedule": crontab(minute=0, hour=3), # Every day at 05:00 UTC,
Expand Down Expand Up @@ -526,12 +520,18 @@ def _(x):
"kwargs": {"delete": True},
"enabled": False,
},
# "mef-harvester": {
# "task": "rero_ils.modules.apiharvester.tasks.harvest_records",
# "schedule": timedelta(minutes=60),
# "kwargs": {"name": "mef", "enabled": False),
# "enabled": False,
# },
"harvest-vs-cantook": {
"task": "rero_ils.modules.api_harvester.tasks.harvest_records",
"schedule": crontab(minute=33, hour=3), # Every day at 03:33 UTC,
"kwargs": {"name": "VS-CANTOOK"},
"enabled": False,
},
"harvest-nj-cantook": {
"task": "rero_ils.modules.api_harvester.tasks.harvest_records",
"schedule": crontab(minute=44, hour=4), # Every day at 04:44 UTC,
"kwargs": {"name": "NJ-CANTOOK"},
"enabled": False,
},
}

CELERY_BROKER_HEARTBEAT = 0
Expand Down
File renamed without changes.
159 changes: 159 additions & 0 deletions rero_ils/modules/api_harvester/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""API for cantook records."""

from __future__ import absolute_import, print_function

import click

from rero_ils.modules.api_harvester.models import ApiHarvestConfig
from rero_ils.modules.locations.api import Location
from rero_ils.modules.organisations.api import Organisation


class ApiHarvest:
"""ApiHarvest class.
config: saved config from ApiHarvester class
file_name: to save records to file
process: create harvested records
harvest_count: how many records to harvest
verbose: print verbose messages
"""

def __init__(
self, name, file_name=None, process=False, harvest_count=-1, verbose=False
):
"""Class init."""
config = self.get_config(name)
if not config:
raise NameError(f"API Config not found: {name}")
self.config = config
self.file = file_name
self.process = process
self.harvest_count = harvest_count
self.verbose = verbose
self._vendor = None
self._url = self.config.url
self._code = self.config.code
self._count = 0
self._count_new = 0
self._count_upd = 0
self._count_del = 0
info = {}
for organisation in Organisation.get_records_by_online_harvested_source(
self._code
):
locations = {}
for location_pid in organisation.get_online_locations():
locations[location_pid] = None
location = Location.get_record_by_pid(location_pid)
library = location.get_library()
if url := library.get_online_harvested_source_url(source=self._code):
locations[location_pid] = url
info[organisation.pid] = {
"item_type_pid": organisation.online_circulation_category(),
"locations": locations,
}
self._info = info

@classmethod
def get_config(cls, name):
"""Get config.
:param name: name of config
"""
return ApiHarvestConfig.query.filter_by(name=name).first()

def get_request_url(self, start_date="1990-01-01", page=1):
"""Get request URL.
start_date: date from where records has to be harvested
page: page from where records have to be harvested
"""
raise NotImplementedError()

def create_update_record(self, record):
"""Create new record or update record.
:param record: record to create or update
"""
raise NotImplementedError()

def save_record(self, record):
"""Save record to file.
:param record: record to write to file
"""
if self.file:
self.file.write(record)

def msg_text(self, pid, msg):
"""Logging message text."""
return f"{self._count}: {self._vendor}:{self._code} {pid} = {msg}"

def process_records(self, records):
"""Process records.
:param records: records to process
"""
for record in records:
if self.harvest_count >= 0 and self._count >= self.harvest_count:
break
self._count += 1
self.save_record(record)
if self.process:
pid, status = self.create_update_record(record)
self.verbose_print(self.msg_text(pid=pid, msg=status.value))

def verbose_print(self, msg):
"""Print verbose message.
:param msg: message to print if verbose
"""
if self.verbose:
click.echo(msg)

def harvest_records(self, from_date):
"""Harvest records from servers.
:param from_date: records changed after this date to harvest
"""
records = []
self.process_records(records=records)
return self._count, len(records)

@property
def count(self):
"""Get count."""
return self._count

@property
def count_new(self):
"""Get new count."""
return self._count_new

@property
def count_upd(self):
"""Get updated count."""
return self._count_upd

@property
def count_del(self):
"""Get deleted count."""
return self._count_del
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2022 RERO
# Copyright (C) 2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand All @@ -15,4 +15,4 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""JSON schemas."""
"""ApiCantook."""
Loading

0 comments on commit 406f80e

Please sign in to comment.