Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

apiharvester: cantook harvesting #3802

Open
wants to merge 1 commit into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions data/apisources.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019 RERO
# Copyright (C) 2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand All @@ -16,8 +16,13 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.


# OAI-PMH harvester configuration.
mef:
url: http://mef.test.rero.ch/api/mef
comment: 'mef persons'
size: 1000
# API harvester configuration.
VS-CANTOOK:
url: https://mediatheque-valais.cantookstation.eu
classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook'
code: 'mv-cantook'

NJ-CANTOOK:
url: https://bm.ebibliomedia.ch
classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook'
code: 'ebibliomedia'
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ reverse = "rero_ils.dojson.cli:reverse"
pjson = "rero_ils.dojson.cli:pretty_json_dump"

[tool.poetry.plugins."dojson.cli.rule"]
marc21_ebooks_to_json = "rero_ils.modules.ebooks.dojson.contrib.marc21:marc21"
marc21_dnb_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_dnb"
marc21_kul_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_kul"
marc21_loc_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_loc"
Expand Down Expand Up @@ -223,11 +222,10 @@ users = "rero_ils.modules.users.views:blueprint"
nooppid = "rero_ils.converters:NoopPIDConverter"

[tool.poetry.plugins."invenio_celery.tasks"]
apiharvester = "rero_ils.modules.apiharvester.tasks"
api_harvester = "rero_ils.modules.api_harvester.tasks"
collections = "rero_ils.modules.collections.tasks"
documents = "rero_ils.modules.documents.tasks"
remote_entities = "rero_ils.modules.entities.remote_entities.tasks"
ebooks = "rero_ils.modules.ebooks.tasks"
holdings = "rero_ils.modules.holdings.tasks"
items = "rero_ils.modules.items.tasks"
loans = "rero_ils.modules.loans.tasks"
Expand All @@ -252,7 +250,7 @@ acq_order_lines = "rero_ils.modules.acquisition.acq_order_lines.models"
acq_orders = "rero_ils.modules.acquisition.acq_orders.models"
acq_receipt_lines = "rero_ils.modules.acquisition.acq_receipt_lines.models"
acq_receipts = "rero_ils.modules.acquisition.acq_receipts.models"
apiharvester = "rero_ils.modules.apiharvester.models"
api_harvester = "rero_ils.modules.api_harvester.models"
budgets = "rero_ils.modules.acquisition.budgets.models"
circ_policies = "rero_ils.modules.circ_policies.models"
collections = "rero_ils.modules.collections.models"
Expand Down
24 changes: 12 additions & 12 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,6 @@ def _(x):
"schedule": timedelta(minutes=60),
"enabled": False,
},
"ebooks-harvester": {
"task": "invenio_oaiharvester.tasks.list_records_from_dates",
"schedule": crontab(minute=22, hour=22),
"kwargs": {"name": "ebooks"},
"enabled": False,
},
"notification-creation": {
"task": "rero_ils.modules.notifications.tasks.create_notifications",
"schedule": crontab(minute=0, hour=3), # Every day at 05:00 UTC,
Expand Down Expand Up @@ -526,12 +520,18 @@ def _(x):
"kwargs": {"delete": True},
"enabled": False,
},
# "mef-harvester": {
# "task": "rero_ils.modules.apiharvester.tasks.harvest_records",
# "schedule": timedelta(minutes=60),
# "kwargs": {"name": "mef", "enabled": False),
# "enabled": False,
# },
"harvest-vs-cantook": {
"task": "rero_ils.modules.api_harvester.tasks.harvest_records",
"schedule": crontab(minute=33, hour=3), # Every day at 03:33 UTC,
"kwargs": {"name": "VS-CANTOOK"},
"enabled": False,
},
"harvest-nj-cantook": {
"task": "rero_ils.modules.api_harvester.tasks.harvest_records",
"schedule": crontab(minute=44, hour=4), # Every day at 04:44 UTC,
"kwargs": {"name": "NJ-CANTOOK"},
"enabled": False,
},
}

CELERY_BROKER_HEARTBEAT = 0
Expand Down
159 changes: 159 additions & 0 deletions rero_ils/modules/api_harvester/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""API for cantook records."""

from __future__ import absolute_import, print_function

import click

from rero_ils.modules.api_harvester.models import ApiHarvestConfig
from rero_ils.modules.locations.api import Location
from rero_ils.modules.organisations.api import Organisation


class ApiHarvest:
"""ApiHarvest class.

config: saved config from ApiHarvester class
file_name: to save records to file
process: create harvested records
harvest_count: how many records to harvest
verbose: print verbose messages
"""

def __init__(
self, name, file_name=None, process=False, harvest_count=-1, verbose=False
):
"""Class init."""
config = self.get_config(name)
if not config:
raise NameError(f"API Config not found: {name}")
self.config = config
self.file = file_name
self.process = process
self.harvest_count = harvest_count
self.verbose = verbose
self._vendor = None
self._url = self.config.url
self._code = self.config.code
self._count = 0
self._count_new = 0
self._count_upd = 0
self._count_del = 0
info = {}
for organisation in Organisation.get_records_by_online_harvested_source(
self._code
):
locations = {}
for location_pid in organisation.get_online_locations():
locations[location_pid] = None
location = Location.get_record_by_pid(location_pid)
library = location.get_library()
if url := library.get_online_harvested_source_url(source=self._code):
locations[location_pid] = url
info[organisation.pid] = {
"item_type_pid": organisation.online_circulation_category(),
"locations": locations,
rerowep marked this conversation as resolved.
Show resolved Hide resolved
}
self._info = info

@classmethod
def get_config(cls, name):
"""Get config.

:param name: name of config
"""
return ApiHarvestConfig.query.filter_by(name=name).first()

def get_request_url(self, start_date="1990-01-01", page=1):
"""Get request URL.

start_date: date from where records has to be harvested
page: page from where records have to be harvested
"""
raise NotImplementedError()

def create_update_record(self, record):
"""Create new record or update record.

:param record: record to create or update
"""
raise NotImplementedError()

def save_record(self, record):
"""Save record to file.

:param record: record to write to file
"""
if self.file:
self.file.write(record)

def msg_text(self, pid, msg):
"""Logging message text."""
return f"{self._count}: {self._vendor}:{self._code} {pid} = {msg}"

def process_records(self, records):
"""Process records.

:param records: records to process
"""
for record in records:
if self.harvest_count >= 0 and self._count >= self.harvest_count:
break
self._count += 1
self.save_record(record)
if self.process:
pid, status = self.create_update_record(record)
self.verbose_print(self.msg_text(pid=pid, msg=status.value))

def verbose_print(self, msg):
"""Print verbose message.

:param msg: message to print if verbose
"""
if self.verbose:
click.echo(msg)

def harvest_records(self, from_date):
"""Harvest records from servers.

:param from_date: records changed after this date to harvest
"""
records = []
self.process_records(records=records)
return self._count, len(records)

@property
def count(self):
"""Get count."""
return self._count

@property
def count_new(self):
"""Get new count."""
return self._count_new

@property
def count_upd(self):
"""Get updated count."""
return self._count_upd

@property
def count_del(self):
"""Get deleted count."""
return self._count_del
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2022 RERO
# Copyright (C) 2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand All @@ -15,4 +15,4 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""JSON schemas."""
"""ApiCantook."""
Loading
Loading