forked from uogbuji/pybibframe
-
Notifications
You must be signed in to change notification settings - Fork 13
/
setup.py
216 lines (162 loc) · 6.9 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# -*- coding: utf-8 -*-
'''
Note: careful not to conflate install_requires with requirements.txt
https://packaging.python.org/discussions/install-requires-vs-requirements/
Reluctantly use setuptools to get install_requires & long_description_content_type
'''
import sys
from setuptools import setup
#from distutils.core import setup
PROJECT_NAME = 'pybibframe'
PROJECT_DESCRIPTION = 'Python tools for BIBFRAME (Bibliographic Framework), a Web-friendly framework for bibliographic descriptions in libraries, for example.',
PROJECT_LICENSE = 'License :: OSI Approved :: Apache Software License'
PROJECT_AUTHOR = 'Uche Ogbuji'
PROJECT_AUTHOR_EMAIL = '[email protected]'
PROJECT_MAINTAINER = 'Zepheira'
PROJECT_MAINTAINER_EMAIL = '[email protected]'
PROJECT_URL = 'http://zepheira.com/'
PACKAGE_DIR = {'bibframe': 'lib'}
PACKAGES = [
'bibframe',
'bibframe.reader',
'bibframe.writer',
'bibframe.contrib',
'bibframe.plugin',
]
SCRIPTS = [
'exec/marc2bf',
'exec/versa2ttl',
'exec/marcbin2xml',
]
#FIXME: Trim some of these as amara3-xml & versa setup.py files are updated to handle requirements
#See: requirements.txt
CORE_REQUIREMENTS = [
'amara3.iri',
'amara3.xml',
'pymarc',
'rdflib',
'pytest',
'versa==0.5.2',
]
if not hasattr(sys, 'pypy_version_info'):
#See: requirements-pypy.txt vs requirements.txt
#In PyPy case use the included pymmh3
CORE_REQUIREMENTS.append('mmh3')
# From http://pypi.python.org/pypi?%3Aaction=list_classifiers
CLASSIFIERS = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Information Technology",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Database",
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Text Processing :: Markup :: XML",
"Topic :: Text Processing :: Indexing",
"Topic :: Utilities",
]
version_file = 'lib/version.py'
exec(compile(open(version_file, "rb").read(), version_file, 'exec'), globals(), locals())
__version__ = '.'.join(version_info)
LONGDESC = '''pybibframe
Requires Python 3.5+. To install:
python setup.py install
# Usage
## Converting MARC/XML to RDF or Versa output (command line)
Note: Versa is a model for Web resources and relationships. Think of it
as an evolution of Resource Description Framework (RDF) that's at once
simpler and more expressive. It's the default internal representation
for pybibframe, though regular RDF is an optional output.
marc2bf records.mrx
Reads MARC/XML from the file records.mrx and outputs a Versa
representation of the resulting BIBFRAME records in JSON format. You can
send that output to a file as well:
marc2bf -o resources.versa.json records.mrx
The Versa representation is the primary format for ongoing, pipeline
processing.
If you want an RDF/Turtle representation of this file you can do:
marc2bf -o resources.versa.json --rdfttl resources.ttl records.mrx
If you want an RDF/XML representation of this file you can do:
marc2bf -o resources.versa.json --rdfxml resources.rdf records.mrx
These options do build the full RDF model in memory, so they can slow
things down quite a bit.
You can get the source MARC/XML from standard input:
curl http://lccn.loc.gov/2006013175/marcxml | marc2bf -c /Users/uche/dev/zepheira/pybibframe-plus/test/resource/config1.json --mod=bibframe.zextra -o /tmp/marc2bf.versa.json
In this case a record is pulled from the Web, in particular Library of
Congress Online Catalog / LCCN Permalink. Another example, Das Innere
des Glaspalastes in London:
curl http://lccn.loc.gov/2012659481/marcxml | marc2bf -c /Users/uche/dev/zepheira/pybibframe-plus/test/resource/config1.json --mod=bibframe.zextra -o /tmp/marc2bf.versa.json
You can process more than one MARC/XML file at a time by listing them on
the command line:
marc2bf records1.mrx records2.mrx records3.mrx
Or by using wildcards:
marc2bf records?.mrx
PyBibframe is highly configurable and extensible. You can specify
plug-ins from the command line. You need to specify the Python module
from which the plugins can be imported and a configuration file
specifying how the plugins are to be used. For example, to use the
``linkreport`` plugin that comes with PyBibframe you can do:
marc2bf -c config1.json --mod=bibframe.plugin records.mrx
Where the contents of config1.json might be:
{
"plugins": [
{"id": "http://bibfra.me/tool/pybibframe#labelizer",
"lookup": {
"http://bibfra.me/vocab/lite/Work": "http://bibfra.me/vocab/lite/title",
"http://bibfra.me/vocab/lite/Instance": "http://bibfra.me/vocab/lite/title"
}
]
}
Which in this case will add RDFS label statements for Works and
Instances to the output.
# Converting MARC/XML to RDF or Versa output (API)
The ``bibframe.reader.bfconvert`` function can be used as an API to run
the conversion.
>>> from bibframe.reader import bfconvert
>>> inputs = open('records.mrx', 'r')
>>> out = open('resorces.versa.json', 'w')
>>> bfconvert(inputs=inputs, entbase='http://example.org', out=out)
# Configuration
* ``marcspecials-vocab``—List of vocabulary (base) IRIs to qualify
relationships and resource types generated from processing the
special MARC fields 006, 007, 008 and the leader.
## Transforms
'transforms': { 'bib': 'http://example.org/vocab/marc-bib-transforms', }
# See also
Some open-source tools for working with BIBFRAME (see
http://bibframe.org)
Note: very useful to have around yaz-marcdump (which e.g. you can use to
conver other MARC formats to MARC/XML)
Download from http://ftp.indexdata.com/pub/yaz/, unpack then do:
$ ./configure --prefix=$HOME/.local
$ make && make install
If you're on a Debian-based Linux you might find useful [these
installation notes](https://gist.github.com/uogbuji/7cbc5c62f99951999574).
[MarcEdit](http://marcedit.reeset.net/) can also convert to MARC/XML.
Just install, select "MARC Tools" from the menu, choose your input file,
specify an output file, and specify the conversion you need to perform,
e.g. "MARC21->MARC21XML" for MARC to MARC/XML. Note the availability of
the UTF-8 output option too.
'''
LONGDESC_CTYPE = 'text/markdown'
setup(
name=PROJECT_NAME,
version=__version__,
description=PROJECT_DESCRIPTION,
license=PROJECT_LICENSE,
author=PROJECT_AUTHOR,
author_email=PROJECT_AUTHOR_EMAIL,
maintainer=PROJECT_MAINTAINER,
maintainer_email=PROJECT_MAINTAINER_EMAIL,
url=PROJECT_URL,
package_dir=PACKAGE_DIR,
packages=PACKAGES,
scripts=SCRIPTS,
install_requires=CORE_REQUIREMENTS,
classifiers=CLASSIFIERS,
long_description=LONGDESC,
long_description_content_type=LONGDESC_CTYPE,
)