Skip to content

Commit

Permalink
neurondm update composer.py example, nlp include literatureCitation
Browse files Browse the repository at this point in the history
now more closely aligned with the current composer data model,
includes references if they are present on individual neurons

the example code is a bit more convoluted, but it works from local
and remote sources as needed now

also better error reporting on request failure in OntMetaIri so that
we can immediately see what uri failed to fetch
  • Loading branch information
tgbugs committed Jun 1, 2023
1 parent 33a7912 commit 0a96bed
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 52 deletions.
175 changes: 126 additions & 49 deletions neurondm/docs/composer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pyontutils.core import OntGraph, OntResIri
import os
from pyontutils.core import OntGraph, OntResIri, OntResPath
from pyontutils.namespaces import rdfs, ilxtr
from neurondm.core import Config, graphBase
from neurondm.core import Config, graphBase, log
from neurondm.core import OntTerm, OntId, RDFL


Expand All @@ -12,36 +13,84 @@ def multi_orig_dest(neuron):
return True


def lg(neuron, predicate):
# TODO could add expected cardinality here if needed
return list(neuron.getObjects(predicate))


def for_composer(n):
return dict(
id = n.id_,
label = n.origLabel,
origin = lg(n, ilxtr.hasSomaLocatedIn),
dest_presyn = lg(n, ilxtr.hasAxonPresynapticElementIn),
dest_sens = lg(n, ilxtr.hasAxonSensorySubcellularElementIn),
dest_dend = lg(n, ilxtr.hasDendriteLocatedIn),
path = lg(n, ilxtr.hasAxonLocatedIn), # TODO pull ordering from partial orders (not implemented in core atm)
#laterality = lg(n, ilxtr.hasLaterality), # left/rigth tricky ?
#projection_laterality = lg(n, ilxtr.???), # axon located in contra ?
species = lg(n, ilxtr.hasInstanceInTaxon),
sex = lg(n, ilxtr.hasBiologicalSex),
circuit_type = lg(n, ilxtr.hasCircuitRolePhenotype),
def makelpesrdf():
collect = []
def lpes(neuron, predicate):
""" get predicates from python bags """
# TODO could add expected cardinality here if needed
return [str(o) for o in neuron.getObjects(predicate)
if not collect.append((predicate, o))]

def lrdf(neuron, predicate):
""" get predicates from graph """
return [ # XXX FIXME core_graph bad etc.
str(o) for o in
neuron.core_graph[neuron.identifier:predicate]]

return lpes, lrdf, collect


def for_composer(n, cull=False):
lpes, lrdf, collect = makelpesrdf()
fc = dict(
id = str(n.id_),
label = str(n.origLabel),
origin = lpes(n, ilxtr.hasSomaLocatedIn),
dest = (
# XXX looking at this there seems to be a fault assumption that
# there is only a single destination type per statement, this is
# not the case, there is destination type per destination
[dict(loc=l, type='AXON-T') for l in lpes(n, ilxtr.hasAxonPresynapticElementIn)] +
# XXX I strongly reccoment renaming this to SENSORY-T so that the
# short forms are harder to confuse A-T and S-T
[dict(loc=l, type='AFFERENT-T') for l in lpes(n, ilxtr.hasAxonSensorySubcellularElementIn)]
),
path = ( # TODO pull ordering from partial orders (not implemented in core atm)
[dict(loc=l, type='AXON') for l in lpes(n, ilxtr.hasAxonLocatedIn)] +
# XXX dendrites don't really ... via ... they are all both terminal and via at the same time ...
[dict(loc=l, type='DENDRITE') for l in lpes(n, ilxtr.hasDendriteLocatedIn)]
),
#laterality = lpes(n, ilxtr.hasLaterality), # left/rigth tricky ?
#projection_laterality = lpes(n, ilxtr.???), # axon located in contra ?
species = lpes(n, ilxtr.hasInstanceInTaxon),
sex = lpes(n, ilxtr.hasBiologicalSex),
circuit_type = lpes(n, ilxtr.hasCircuitRolePhenotype),
phenotype = lpes(n, ilxtr.hasAnatomicalSystemPhenotype), # current meaning of composer phenotype
anatomical_system = lpes(n, ilxtr.hasAnatomicalSystemPhenotype),
# there are a number of dimensions that we aren't converting right now
dont_know_fcrp = lg(n, ilxtr.hasFunctionalCircuitRolePhenotype),
phenotype = (lg(n, ilxtr.hasPhenotype) # FIXME currently a grab bag of other types here
+ lg(n, ilxtr.hasMolecularPhenotype)
+ lg(n, ilxtr.hasProjectionPhenotype)),
forward_connection = lg(n, ilxtr.hasForwardConnectionPhenotype),
dont_know_fcrp = lpes(n, ilxtr.hasFunctionalCircuitRolePhenotype),
other_phenotype = ( lpes(n, ilxtr.hasPhenotype)
+ lpes(n, ilxtr.hasMolecularPhenotype)
+ lpes(n, ilxtr.hasProjectionPhenotype)),
forward_connection = lpes(n, ilxtr.hasForwardConnectionPhenotype),

# direct references from individual individual neurons
provenance = lrdf(n, ilxtr.literatureCitation),
sentence_number = lrdf(n, ilxtr.sentenceNumber),
note_alert = lrdf(n, ilxtr.alertNote),
# XXX provenance from ApiNATOMY models as a whole is not ingested
# right now because composer lacks support for 1:n from neuron to
# prov, (or rather lacks prov collections) and because it attaches
# prov to the sentece, which does not exist for all neurons

# TODO more ...
# notes = ?

# for _ignore, hasClassificationPhenotype is used for ApiNATOMY
# unlikely to be encountered for real neurons any time soon
_ignore = lpes(n, ilxtr.hasClassificationPhenotype), # used to ensure we account for all phenotypes
)
npo = set((p.e, p.p) for p in n.pes)
cpo = set(collect)
unaccounted_pos = npo - cpo
if unaccounted_pos:
log.warning(
(n.id_, [[n.in_graph.namespace_manager.qname(e) for e in pos]
for pos in unaccounted_pos]))
return {k:v for k, v in fc.items() if v} if cull else fc


def location_summary(neurons, services):
def location_summary(neurons, services, anatent_simple=False):
import csv
OntTerm.query._services = services
locations = sorted(set(
Expand All @@ -52,62 +101,90 @@ def key(t):
return (t.prefix, t.label[0].lower()
if isinstance(t, tuple)
else t.lower())
header = 'label', 'curie', 'iri'
rows = (
[header] +
[(_.label, _.curie, _.iri) for _ in sorted(locations, key=key)])
with open('/tmp/npo-nlp-apinat-location-summary.csv', 'wt') as f:
csv.writer(f).writerows(rows)

if anatent_simple:
header = 'label', 'curie', 'iri'
rows = (
[header] +
[(_.label, _.curie, _.iri) for _ in sorted(locations, key=key)])
with open('/tmp/npo-nlp-apinat-location-summary.csv', 'wt') as f:
csv.writer(f, lineterminator='\n').writerows(rows)

else:
header = 'o', 'o_label', 'o_synonym'
rows = (
[header] +
[(_.iri, _.label, syn) for _ in sorted(locations, key=key)
for syn in _.synonyms])
with open('/tmp/anatomical_entities.csv', 'wt') as f:
csv.writer(f, lineterminator='\n').writerows(rows)


def main():
def main(local=False, anatomical_entities=False, anatent_simple=False):
# if (local := True, anatomical_entities := True, anatent_simple := False):

config = Config('random-merge')
g = OntGraph() # load and query graph

# remove scigraph and interlex calls
graphBase._sgv = None
del graphBase._sgv
_old_query_services = OntTerm.query._services
if len(OntTerm.query._services) > 1:
# backup services and avoid issues on rerun
_old_query_services = OntTerm.query._services
_noloc_query_services = _old_query_services[1:]

OntTerm.query._services = (RDFL(g, OntId),)

b = ('https://raw.githubusercontent.com/SciCrunch/'
'NIF-Ontology/neurons/ttl/generated/neurons/')
# base paths to ontology files
gen_neurons_path = 'ttl/generated/neurons/'
suffix = '.ttl'
if local:
from pyontutils.config import auth
olr = auth.get_path('ontology-local-repo')
local_base = olr / gen_neurons_path
else:
orr = 'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/neurons/'
remote_base = orr + gen_neurons_path

# full imports
for f in ('apinat-partial-orders',
'apinat-pops-more',
'apinat-simple-sheet',
'sparc-nlp'):
ori = OntResIri(b + f + '.ttl')
if local:
ori = OntResPath(local_base / (f + suffix))
else:
ori = OntResIri(remote_base + f + suffix)
[g.add(t) for t in ori.graph]

# label only imports
for f in (
b + 'apinatomy-neuron-populations' + '.ttl',
('https://raw.githubusercontent.com/SciCrunch/'
'NIF-Ontology/neurons/ttl/npo.ttl')):
ori = OntResIri(f)
for f in ('apinatomy-neuron-populations',
'../../npo'):
p = os.path.normpath(gen_neurons_path + f)
if local:
ori = OntResPath(olr / (p + suffix))
else:
ori = OntResIri(orr + p + suffix)

[g.add((s, rdfs.label, o)) for s, o in ori.graph[:rdfs.label:]]

config.load_existing(g)
# FIXME currently subClassOf axioms are not parsed back so we are e.g.
# missing hasInstanceInTaxon axioms for apinatomy neurons
neurons = config.neurons() # scigraph required here if deps not removed above

# ingest to composer starts here
mvp_ingest = [n for n in neurons if not multi_orig_dest(n)]

dims = set(p for n in neurons for p in n.edges) # for reference
fcs = [for_composer(n) for n in mvp_ingest]
_fcne = [for_composer(n, cull=True) for n in mvp_ingest] # exclude empties for easier manual review

# example neuron
n = mvp_ingest[0]
fc = for_composer(n)

if False:
location_summary(neurons, _old_query_services)

breakpoint()
if anatomical_entities:
location_summary(neurons, _noloc_query_services, anatent_simple)


if __name__ == '__main__':
Expand Down
6 changes: 3 additions & 3 deletions neurondm/neurondm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2465,14 +2465,14 @@ def _load_existing(cls, iris):
if not cls._loading:
NeuronBase._loading = True # block all other neuron loading
try:
log.debug(str([i for i in iris if '4164' in i or '100212' in i]))
#log.debug(str([i for i in iris if '4164' in i or '100212' in i]))
for iri in iris:
# rod/cone issue
#breakpoint()
try:
n = cls(id_=iri, override=True)#, out_graph=cls.config.load_graph) # I think we can get away without this
if iri.endswith('4164') or iri.endswith('100212'):
log.debug(f'{iri} -> {n}')
#if iri.endswith('4164') or iri.endswith('100212'):
#log.debug(f'{iri} -> {n}')

# because we just call Config again an everything resets
except cls.owlClassMismatch as e:
Expand Down
2 changes: 2 additions & 0 deletions neurondm/neurondm/models/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def asdf(s, p, rm):
asdf(s, ilxtr.curatorNote, r.curation_notes)
asdf(s, ilxtr.reviewNote, r.review_notes)
asdf(s, ilxtr.reference, r.reference_pubmed_id__doi_or_text)
asdf(s, ilxtr.literatureCitation, r.literature_citation)
asdf(s, rdfs.label, r.neuron_population_label_a_to_b_via_c)
if hasattr(r, 'alert_explanation'):
asdf(s, ilxtr.alertNote, r.alert_explanation)
Expand All @@ -126,6 +127,7 @@ def asdf(s, p, rm):
p = map_predicates(r.relationship().value)
o = OntId(r.explicit_complement().value)
ec[(s, p)] = o

if hasattr(r, 'axonal_course_poset') and r.axonal_course_poset().value:
# s.u and OntId(...).u to avoid duplicate subjects/objects in the graph
# due to type vs instance issues for rdflib.URIRef and OntId
Expand Down
3 changes: 3 additions & 0 deletions pyontutils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,9 @@ def _data_from_generator(self,
conventions_type,
yield_response_gen,
):
if not resp.ok:
resp.raise_for_status()

first = next(gen)
# TODO better type detection

Expand Down

0 comments on commit 0a96bed

Please sign in to comment.