-
Notifications
You must be signed in to change notification settings - Fork 193
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CSW harvester OutputSchema config support #258 #259
base: master
Are you sure you want to change the base?
Changes from 6 commits
e2349d9
33d9b70
855ab7d
bf2d1d6
fbd5d1c
9d36378
4fb17cc
f91f516
b82eb89
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,14 +70,36 @@ class CswService(OwsService): | |
def __init__(self, endpoint=None): | ||
super(CswService, self).__init__(endpoint) | ||
self.sortby = SortBy([SortProperty('dc:identifier')]) | ||
# check capabilities | ||
_cap = self.getcapabilities(endpoint)['response'] | ||
self.capabilities=etree.ElementTree(etree.fromstring(_cap)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please try to follow PEP8 guidelines, specially spacing between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry, I can't validate the whole project and my code editor is not helping me, good catch, I'll try to fix my bad. |
||
|
||
def _get_output_schemas(self, operation): | ||
_cap_ns = self.capabilities.getroot().nsmap | ||
_ows_ns = _cap_ns.get('ows') | ||
if not _ows_ns: | ||
raise CswError('Bad getcapabilities response: OWS namespace not found '+str(_cap_ns)) | ||
_op=self.capabilities.find("//{{{}}}Operation[@name='{}']".format(_ows_ns,operation)) | ||
_schemas=_op.find("{{{}}}Parameter[@name='outputSchema']".format(_ows_ns)) | ||
_values = map(lambda v: v.text, _schemas.findall("{{{}}}Value".format(_ows_ns))) | ||
output_schemas={} | ||
for key, value in _schemas.nsmap.items(): | ||
if value in _values: | ||
output_schemas.update({key:value}) | ||
return output_schemas | ||
|
||
def getrecords(self, qtype=None, keywords=[], | ||
typenames="csw:Record", esn="brief", | ||
skip=0, count=10, outputschema="gmd", **kw): | ||
from owslib.csw import namespaces | ||
|
||
constraints = [] | ||
csw = self._ows(**kw) | ||
|
||
# fetch target csw server capabilities for requested output schema | ||
output_schemas=self._get_output_schemas('GetRecords') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we move this call to the def __init__(self, endpoint=None):
_cap = self.getcapabilities(endpoint)['response']
self.capabilities = etree.ElementTree(etree.fromstring(_cap))
self.output_schemas = {
'GetRecords': self._get_output_schemas('GetRecords'),
'GetRecordById': self._get_output_schemas('GetRecordById'),
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
if not output_schemas.get(outputschema): | ||
raise CswError('Output schema \'{}\' not supported by target server: '.format(output_schemas)) | ||
|
||
if qtype is not None: | ||
constraints.append(PropertyIsEqualTo("dc:type", qtype)) | ||
|
||
|
@@ -87,7 +109,7 @@ def getrecords(self, qtype=None, keywords=[], | |
"esn": esn, | ||
"startposition": skip, | ||
"maxrecords": count, | ||
"outputschema": namespaces[outputschema], | ||
"outputschema": output_schemas[outputschema], | ||
"sortby": self.sortby | ||
} | ||
log.info('Making CSW request: getrecords2 %r', kwa) | ||
|
@@ -102,10 +124,15 @@ def getrecords(self, qtype=None, keywords=[], | |
def getidentifiers(self, qtype=None, typenames="csw:Record", esn="brief", | ||
keywords=[], limit=None, page=10, outputschema="gmd", | ||
startposition=0, cql=None, **kw): | ||
from owslib.csw import namespaces | ||
|
||
constraints = [] | ||
csw = self._ows(**kw) | ||
|
||
# fetch target csw server capabilities for requested output schema | ||
output_schemas=self._get_output_schemas('GetRecords') | ||
if not output_schemas.get(outputschema): | ||
raise CswError('Output schema \'{}\' not supported by target server: '.format(output_schemas)) | ||
|
||
if qtype is not None: | ||
constraints.append(PropertyIsEqualTo("dc:type", qtype)) | ||
|
||
|
@@ -115,7 +142,7 @@ def getidentifiers(self, qtype=None, typenames="csw:Record", esn="brief", | |
"esn": esn, | ||
"startposition": startposition, | ||
"maxrecords": page, | ||
"outputschema": namespaces[outputschema], | ||
"outputschema": output_schemas[outputschema], | ||
"cql": cql, | ||
"sortby": self.sortby | ||
} | ||
|
@@ -129,7 +156,6 @@ def getidentifiers(self, qtype=None, typenames="csw:Record", esn="brief", | |
err = 'Error getting identifiers: %r' % \ | ||
csw.exceptionreport.exceptions | ||
#log.error(err) | ||
raise CswError(err) | ||
|
||
if matches == 0: | ||
matches = csw.results['matches'] | ||
|
@@ -154,11 +180,17 @@ def getidentifiers(self, qtype=None, typenames="csw:Record", esn="brief", | |
kwa["startposition"] = startposition | ||
|
||
def getrecordbyid(self, ids=[], esn="full", outputschema="gmd", **kw): | ||
from owslib.csw import namespaces | ||
|
||
csw = self._ows(**kw) | ||
|
||
# fetch target csw server capabilities for requested output schema | ||
output_schemas=self._get_output_schemas('GetRecordById') | ||
if not output_schemas.get(outputschema): | ||
raise CswError('Output schema \'{}\' not supported by target server: '.format(output_schemas)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably here I should be more tolerant Logging ERROR and returning. |
||
|
||
kwa = { | ||
"esn": esn, | ||
"outputschema": namespaces[outputschema], | ||
"outputschema": output_schemas[outputschema], | ||
} | ||
# Ordinary Python version's don't support the metadata argument | ||
log.info('Making CSW request: getrecordbyid %r %r', ids, kwa) | ||
|
@@ -168,14 +200,17 @@ def getrecordbyid(self, ids=[], esn="full", outputschema="gmd", **kw): | |
csw.exceptionreport.exceptions | ||
#log.error(err) | ||
raise CswError(err) | ||
if not csw.records: | ||
elif csw.records: | ||
record = self._xmd(list(csw.records.values())[0]) | ||
elif csw.response: | ||
record = self._xmd(etree.fromstring(csw.response)) | ||
else: | ||
return | ||
record = self._xmd(list(csw.records.values())[0]) | ||
|
||
## strip off the enclosing results container, we only want the metadata | ||
#md = csw._exml.find("/gmd:MD_Metadata")#, namespaces=namespaces) | ||
# Ordinary Python version's don't support the metadata argument | ||
md = csw._exml.find("/{http://www.isotc211.org/2005/gmd}MD_Metadata") | ||
# '/{schema}*' expression should be safe enough and is able to match the | ||
# desired schema followed by both MD_Metadata or MI_Metadata (iso19115[-2]) | ||
md = csw._exml.find("/{{{schema}}}*".format(schema=output_schemas[outputschema])) | ||
mdtree = etree.ElementTree(md) | ||
try: | ||
record["xml"] = etree.tostring(mdtree, pretty_print=True, encoding=str) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you document the new
output_schema
option and its default value in here so others are aware of it?https://github.com/ckan/ckanext-spatial/blob/master/doc/harvesters.rst
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added fallback to default in case the server is not supporting iso19139 -> 19115 transformation
the fallback will log and switch back to default asking for iso19139 -> iso19139.