diff --git a/neurondm/docs/composer.py b/neurondm/docs/composer.py index ff5b1cb3..1bfd03aa 100644 --- a/neurondm/docs/composer.py +++ b/neurondm/docs/composer.py @@ -1,6 +1,7 @@ -from pyontutils.core import OntGraph, OntResIri +import os +from pyontutils.core import OntGraph, OntResIri, OntResPath from pyontutils.namespaces import rdfs, ilxtr -from neurondm.core import Config, graphBase +from neurondm.core import Config, graphBase, log from neurondm.core import OntTerm, OntId, RDFL @@ -12,36 +13,84 @@ def multi_orig_dest(neuron): return True -def lg(neuron, predicate): - # TODO could add expected cardinality here if needed - return list(neuron.getObjects(predicate)) - - -def for_composer(n): - return dict( - id = n.id_, - label = n.origLabel, - origin = lg(n, ilxtr.hasSomaLocatedIn), - dest_presyn = lg(n, ilxtr.hasAxonPresynapticElementIn), - dest_sens = lg(n, ilxtr.hasAxonSensorySubcellularElementIn), - dest_dend = lg(n, ilxtr.hasDendriteLocatedIn), - path = lg(n, ilxtr.hasAxonLocatedIn), # TODO pull ordering from partial orders (not implemented in core atm) - #laterality = lg(n, ilxtr.hasLaterality), # left/rigth tricky ? - #projection_laterality = lg(n, ilxtr.???), # axon located in contra ? - species = lg(n, ilxtr.hasInstanceInTaxon), - sex = lg(n, ilxtr.hasBiologicalSex), - circuit_type = lg(n, ilxtr.hasCircuitRolePhenotype), +def makelpesrdf(): + collect = [] + def lpes(neuron, predicate): + """ get predicates from python bags """ + # TODO could add expected cardinality here if needed + return [str(o) for o in neuron.getObjects(predicate) + if not collect.append((predicate, o))] + + def lrdf(neuron, predicate): + """ get predicates from graph """ + return [ # XXX FIXME core_graph bad etc. + str(o) for o in + neuron.core_graph[neuron.identifier:predicate]] + + return lpes, lrdf, collect + + +def for_composer(n, cull=False): + lpes, lrdf, collect = makelpesrdf() + fc = dict( + id = str(n.id_), + label = str(n.origLabel), + origin = lpes(n, ilxtr.hasSomaLocatedIn), + dest = ( + # XXX looking at this there seems to be a fault assumption that + # there is only a single destination type per statement, this is + # not the case, there is destination type per destination + [dict(loc=l, type='AXON-T') for l in lpes(n, ilxtr.hasAxonPresynapticElementIn)] + + # XXX I strongly reccoment renaming this to SENSORY-T so that the + # short forms are harder to confuse A-T and S-T + [dict(loc=l, type='AFFERENT-T') for l in lpes(n, ilxtr.hasAxonSensorySubcellularElementIn)] + ), + path = ( # TODO pull ordering from partial orders (not implemented in core atm) + [dict(loc=l, type='AXON') for l in lpes(n, ilxtr.hasAxonLocatedIn)] + + # XXX dendrites don't really ... via ... they are all both terminal and via at the same time ... + [dict(loc=l, type='DENDRITE') for l in lpes(n, ilxtr.hasDendriteLocatedIn)] + ), + #laterality = lpes(n, ilxtr.hasLaterality), # left/rigth tricky ? + #projection_laterality = lpes(n, ilxtr.???), # axon located in contra ? + species = lpes(n, ilxtr.hasInstanceInTaxon), + sex = lpes(n, ilxtr.hasBiologicalSex), + circuit_type = lpes(n, ilxtr.hasCircuitRolePhenotype), + phenotype = lpes(n, ilxtr.hasAnatomicalSystemPhenotype), # current meaning of composer phenotype + anatomical_system = lpes(n, ilxtr.hasAnatomicalSystemPhenotype), # there are a number of dimensions that we aren't converting right now - dont_know_fcrp = lg(n, ilxtr.hasFunctionalCircuitRolePhenotype), - phenotype = (lg(n, ilxtr.hasPhenotype) # FIXME currently a grab bag of other types here - + lg(n, ilxtr.hasMolecularPhenotype) - + lg(n, ilxtr.hasProjectionPhenotype)), - forward_connection = lg(n, ilxtr.hasForwardConnectionPhenotype), + dont_know_fcrp = lpes(n, ilxtr.hasFunctionalCircuitRolePhenotype), + other_phenotype = ( lpes(n, ilxtr.hasPhenotype) + + lpes(n, ilxtr.hasMolecularPhenotype) + + lpes(n, ilxtr.hasProjectionPhenotype)), + forward_connection = lpes(n, ilxtr.hasForwardConnectionPhenotype), + + # direct references from individual individual neurons + provenance = lrdf(n, ilxtr.literatureCitation), + sentence_number = lrdf(n, ilxtr.sentenceNumber), + note_alert = lrdf(n, ilxtr.alertNote), + # XXX provenance from ApiNATOMY models as a whole is not ingested + # right now because composer lacks support for 1:n from neuron to + # prov, (or rather lacks prov collections) and because it attaches + # prov to the sentece, which does not exist for all neurons + # TODO more ... + # notes = ? + + # for _ignore, hasClassificationPhenotype is used for ApiNATOMY + # unlikely to be encountered for real neurons any time soon + _ignore = lpes(n, ilxtr.hasClassificationPhenotype), # used to ensure we account for all phenotypes ) + npo = set((p.e, p.p) for p in n.pes) + cpo = set(collect) + unaccounted_pos = npo - cpo + if unaccounted_pos: + log.warning( + (n.id_, [[n.in_graph.namespace_manager.qname(e) for e in pos] + for pos in unaccounted_pos])) + return {k:v for k, v in fc.items() if v} if cull else fc -def location_summary(neurons, services): +def location_summary(neurons, services, anatent_simple=False): import csv OntTerm.query._services = services locations = sorted(set( @@ -52,46 +101,75 @@ def key(t): return (t.prefix, t.label[0].lower() if isinstance(t, tuple) else t.lower()) - header = 'label', 'curie', 'iri' - rows = ( - [header] + - [(_.label, _.curie, _.iri) for _ in sorted(locations, key=key)]) - with open('/tmp/npo-nlp-apinat-location-summary.csv', 'wt') as f: - csv.writer(f).writerows(rows) + + if anatent_simple: + header = 'label', 'curie', 'iri' + rows = ( + [header] + + [(_.label, _.curie, _.iri) for _ in sorted(locations, key=key)]) + with open('/tmp/npo-nlp-apinat-location-summary.csv', 'wt') as f: + csv.writer(f, lineterminator='\n').writerows(rows) + + else: + header = 'o', 'o_label', 'o_synonym' + rows = ( + [header] + + [(_.iri, _.label, syn) for _ in sorted(locations, key=key) + for syn in _.synonyms]) + with open('/tmp/anatomical_entities.csv', 'wt') as f: + csv.writer(f, lineterminator='\n').writerows(rows) -def main(): +def main(local=False, anatomical_entities=False, anatent_simple=False): + # if (local := True, anatomical_entities := True, anatent_simple := False): + config = Config('random-merge') g = OntGraph() # load and query graph # remove scigraph and interlex calls graphBase._sgv = None del graphBase._sgv - _old_query_services = OntTerm.query._services + if len(OntTerm.query._services) > 1: + # backup services and avoid issues on rerun + _old_query_services = OntTerm.query._services + _noloc_query_services = _old_query_services[1:] + OntTerm.query._services = (RDFL(g, OntId),) - b = ('https://raw.githubusercontent.com/SciCrunch/' - 'NIF-Ontology/neurons/ttl/generated/neurons/') + # base paths to ontology files + gen_neurons_path = 'ttl/generated/neurons/' + suffix = '.ttl' + if local: + from pyontutils.config import auth + olr = auth.get_path('ontology-local-repo') + local_base = olr / gen_neurons_path + else: + orr = 'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/neurons/' + remote_base = orr + gen_neurons_path # full imports for f in ('apinat-partial-orders', 'apinat-pops-more', 'apinat-simple-sheet', 'sparc-nlp'): - ori = OntResIri(b + f + '.ttl') + if local: + ori = OntResPath(local_base / (f + suffix)) + else: + ori = OntResIri(remote_base + f + suffix) [g.add(t) for t in ori.graph] # label only imports - for f in ( - b + 'apinatomy-neuron-populations' + '.ttl', - ('https://raw.githubusercontent.com/SciCrunch/' - 'NIF-Ontology/neurons/ttl/npo.ttl')): - ori = OntResIri(f) + for f in ('apinatomy-neuron-populations', + '../../npo'): + p = os.path.normpath(gen_neurons_path + f) + if local: + ori = OntResPath(olr / (p + suffix)) + else: + ori = OntResIri(orr + p + suffix) + [g.add((s, rdfs.label, o)) for s, o in ori.graph[:rdfs.label:]] config.load_existing(g) - # FIXME currently subClassOf axioms are not parsed back so we are e.g. - # missing hasInstanceInTaxon axioms for apinatomy neurons neurons = config.neurons() # scigraph required here if deps not removed above # ingest to composer starts here @@ -99,15 +177,14 @@ def main(): dims = set(p for n in neurons for p in n.edges) # for reference fcs = [for_composer(n) for n in mvp_ingest] + _fcne = [for_composer(n, cull=True) for n in mvp_ingest] # exclude empties for easier manual review # example neuron n = mvp_ingest[0] fc = for_composer(n) - if False: - location_summary(neurons, _old_query_services) - - breakpoint() + if anatomical_entities: + location_summary(neurons, _noloc_query_services, anatent_simple) if __name__ == '__main__': diff --git a/neurondm/neurondm/core.py b/neurondm/neurondm/core.py index d8965df3..27d68885 100644 --- a/neurondm/neurondm/core.py +++ b/neurondm/neurondm/core.py @@ -2465,14 +2465,14 @@ def _load_existing(cls, iris): if not cls._loading: NeuronBase._loading = True # block all other neuron loading try: - log.debug(str([i for i in iris if '4164' in i or '100212' in i])) + #log.debug(str([i for i in iris if '4164' in i or '100212' in i])) for iri in iris: # rod/cone issue #breakpoint() try: n = cls(id_=iri, override=True)#, out_graph=cls.config.load_graph) # I think we can get away without this - if iri.endswith('4164') or iri.endswith('100212'): - log.debug(f'{iri} -> {n}') + #if iri.endswith('4164') or iri.endswith('100212'): + #log.debug(f'{iri} -> {n}') # because we just call Config again an everything resets except cls.owlClassMismatch as e: diff --git a/neurondm/neurondm/models/nlp.py b/neurondm/neurondm/models/nlp.py index ded711d5..46c27ad7 100644 --- a/neurondm/neurondm/models/nlp.py +++ b/neurondm/neurondm/models/nlp.py @@ -118,6 +118,7 @@ def asdf(s, p, rm): asdf(s, ilxtr.curatorNote, r.curation_notes) asdf(s, ilxtr.reviewNote, r.review_notes) asdf(s, ilxtr.reference, r.reference_pubmed_id__doi_or_text) + asdf(s, ilxtr.literatureCitation, r.literature_citation) asdf(s, rdfs.label, r.neuron_population_label_a_to_b_via_c) if hasattr(r, 'alert_explanation'): asdf(s, ilxtr.alertNote, r.alert_explanation) @@ -126,6 +127,7 @@ def asdf(s, p, rm): p = map_predicates(r.relationship().value) o = OntId(r.explicit_complement().value) ec[(s, p)] = o + if hasattr(r, 'axonal_course_poset') and r.axonal_course_poset().value: # s.u and OntId(...).u to avoid duplicate subjects/objects in the graph # due to type vs instance issues for rdflib.URIRef and OntId diff --git a/pyontutils/core.py b/pyontutils/core.py index cc839ff1..3b30fe72 100644 --- a/pyontutils/core.py +++ b/pyontutils/core.py @@ -649,6 +649,9 @@ def _data_from_generator(self, conventions_type, yield_response_gen, ): + if not resp.ok: + resp.raise_for_status() + first = next(gen) # TODO better type detection