Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
dosumis committed Feb 21, 2021
2 parents a358068 + a92f52e commit 45a8498
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 16 deletions.
4 changes: 1 addition & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,5 @@ requests
psycopg2
pandas
pandasql



jsonpath_rw
setuptools~=50.3.2
120 changes: 114 additions & 6 deletions src/vfb_connect/cross_server_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import warnings

from .owl.owlery_query_tools import OWLeryConnect
from .neo.neo4j_tools import Neo4jConnect, QueryWrapper, re
from .neo.neo4j_tools import Neo4jConnect, QueryWrapper, re, dict_cursor
from .default_servers import get_default_servers
import pandas as pd


def gen_short_form(iri):
"""Generate short_form (string) from an iri string
Expand All @@ -9,6 +13,24 @@ def gen_short_form(iri):


class VfbConnect:
"""API wrapper class. By default this wraps connections to the more basal API endpoints (OWL, Neo4j).
Top level methods combined semantic queries that range across VFB content with neo4j queries, returning detailed
metadata about anatomical classes and individuals that fulfill these queries.
Methods allowing direct queries cypher queries of the production Neo4j are available under `nc`
Methods for querying Neo4j with arbitrary lists of identifiers to return rich metadata or mappings to external IDs
are available under `neo_query_wrapper`.
Direct access OWL queries, returning identifiers only, are available via methods under `oc`
Example semantic queries (OWL class expressions). Note quoting scheme (outer `"` + single quotes for entities).
"'GABAergic neuron'"
"'GABAeric neuron' that 'overlaps' some 'antennal lobe'"
"""
def __init__(self, neo_endpoint=get_default_servers()['neo_endpoint'],
neo_credentials=get_default_servers()['neo_credentials'],
owlery_endpoint=get_default_servers()['owlery_endpoint'],
Expand All @@ -25,6 +47,7 @@ def __init__(self, neo_endpoint=get_default_servers()['neo_endpoint'],

self.oc = OWLeryConnect(endpoint=owlery_endpoint,
lookup=self.nc.get_lookup(limit_by_prefix=lookup_prefixes))
self.vfb_base = "https://v2.virtualflybrain.org/org.geppetto.frontend/geppetto?id="

def get_terms_by_region(self, region, cells_only=False, verbose=False, query_by_label=True):
"""Generate JSON reports for all terms relevant to
Expand All @@ -43,26 +66,111 @@ def get_terms_by_region(self, region, cells_only=False, verbose=False, query_by_
return self.neo_query_wrapper.get_type_TermInfo(list(map(gen_short_form, terms)))

def get_subclasses(self, class_expression, query_by_label=True, direct=False):
"""Generate JSON report of all subclasses of the submitted term."""
"""Generate JSON report of all subclasses of class_expression."""
if not re.search("'", class_expression):
class_expression = "'" + class_expression + "'"
terms = self.oc.get_subclasses("%s" % class_expression, query_by_label=query_by_label)
return self.neo_query_wrapper.get_type_TermInfo(list(map(gen_short_form, terms)))

def get_superclasses(self, class_expression, query_by_label=True, direct=False):
"""Generate JSON report of all subclasses of the submitted term."""
"""Generate JSON report of all superclasses of class_expression."""
if not re.search("'", class_expression):
class_expression = "'" + class_expression + "'"
terms = self.oc.get_subclasses("%s" % class_expression, query_by_label=query_by_label)
terms = self.oc.get_superclasses("%s" % class_expression, query_by_label=query_by_label)
return self.neo_query_wrapper.get_type_TermInfo(list(map(gen_short_form, terms)))

def get_images(self, class_expression, query_by_label = True, direct = False):
"""Generate JSON report of all images of the submitted type."""
def get_instances(self, class_expression, query_by_label=True, direct=False):
"""Generate JSON report of all instances of class_expression. Instances are specific examples
of a type/class of structure, e.g. a specific instance of the neuron DA1 adPN from the FAFB_catmaid
database. Instances are typically associated with registered 3D image data and may include
connectomics data."""
if not re.search("'", class_expression):
class_expression = "'" + class_expression + "'"
terms = self.oc.get_instances("%s" % class_expression, query_by_label=query_by_label)
return self.neo_query_wrapper.get_anatomical_individual_TermInfo(list(map(gen_short_form, terms)))

def _get_neurons_connected_to(self, neuron, weight, direction, classification=None, query_by_label=True):
instances = []
directions = ['upstream', 'downstream']
if not (direction in directions):
raise Exception(ValueError) # Needs improving
if classification:
instances = self.oc.get_instances(classification, query_by_label=query_by_label)
cypher_query = 'MATCH (upstream:Neuron)-[r:synapsed_to]->(downstream:Neuron) ' \
'WHERE r.weight[0] >= %d ' % weight
if query_by_label:
cypher_query += 'AND %s.label = "%s" ' % (direction, neuron)
else:
cypher_query += 'AND %s.short_form = "%s" ' % (direction, neuron)
if classification and instances:
directions.remove(direction)
cypher_query += "AND %s.iri IN %s " % (directions[0], str(instances))
cypher_query += "RETURN upstream.short_form as query_neuron_id, upstream.label as query_neuron_name, " \
"r.weight[0] as weight, " \
"downstream.short_form as target_neuron_id, downstream.label as target_neuron_name"
r = self.nc.commit_list([cypher_query])
dc = dict_cursor(r)
return pd.DataFrame.from_records(dc)

def get_neurons_downstream_of(self, neuron, weight, classification=None, query_by_label=True):
"""Get all neurons downstream of individual `neuron` (short_form if query_by_label=False, otherwise label)
with connection strength > threshold. Optionally restrict target neurons to those specified by
`classification = 'class expression' e.g. "'Kenyon cell'" or "'neuron' that overlaps 'lateral horn'"."""
return self._get_neurons_connected_to(neuron=neuron, weight=weight, direction='upstream',
classification=classification, query_by_label=query_by_label)

def get_neurons_upstream_of(self, neuron, weight, classification=None, query_by_label=True):
"""Get all neurons downstream of individual `neuron` (short_form if query_by_label=False, otherwise label)
with connection strength > threshold. Optionally restrict target neurons to those specified by
`classification = 'class expression' e.g. "'Kenyon cell'" or "'neuron' that overlaps 'lateral horn'"."""
return self._get_neurons_connected_to(neuron=neuron, weight=weight, direction='downstream',
classification=classification, query_by_label=query_by_label)

def get_connected_neurons_by_type(self, upstream_type, downstream_type, weight, query_by_label=True):
"""Get all synaptic connections between individual neurons of `upstream_type` and `dowstream_type` where
each of these types is the name of a neuron class/type in VFB."""

# Note - chose not to do this with class expressions to avoid poor performance and blowing up results.
# This might be confusing tough, given behavior of other, similar methods.
# Might be better to refactor to work out if query is class expression or class & funnel query method
# accordingly.

qprop = 'short_form'
if query_by_label:
qprop = 'label'
# upstream_instances = self.oc.get_instances(upstream_type, query_by_label=query_by_label)
cypher_query = "MATCH (up:Class:Neuron)<-[:SUBCLASSOF|INSTANCEOF*..]-(n1:Neuron:Individual)" \
"-[r:synapsed_to]->(n2:Neuron:Individual)-[:SUBCLASSOF|INSTANCEOF*..]->(down:Class:Neuron) " \
"WHERE r.weight[0] > %d " % weight
cypher_query += 'AND up.%s = "%s" and down.%s = "%s" ' % (qprop, upstream_type, qprop, downstream_type)
cypher_query += "RETURN n1.short_form as upstream_neuron_id, n1.label as upstream_neuron_name, " \
"r.weight[0] as weight, " \
"n2.short_form as downstream_neuron_id, n2.label as downstream_neuron_name"
r = self.nc.commit_list([cypher_query])
dc = dict_cursor(r)
return pd.DataFrame.from_records(dc)

def get_vfb_link(self, short_forms: iter, template):
"""Takes a list of VFB IDs (short_forms) for individuals and returns a link to VFB loading all available images
of neurons on that template."""
return self.vfb_base + short_forms.pop() + "&i=" + template + ',' + ','.join(short_forms)

def get_images_by_type(self, class_expression, template, image_folder,
image_type='swc', query_by_label=True, direct=False):
"""Retrieve images of instances of `class_expression` registered to `template` and save to disk,
along with manifest and references, to `image_folder`. Default image type = swc. Also supported: obj, nrrd, rds, wlz.
Returns manifest"""
instances = self.oc.get_instances(class_expression,
query_by_label=query_by_label,
direct=direct)
return self.neo_query_wrapper.get_images([gen_short_form(i) for i in instances],
template=template,
image_folder=image_folder,
image_type=image_type)







Expand Down
64 changes: 60 additions & 4 deletions src/vfb_connect/neo/neo4j_tools.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3

import os
import requests
import json
import warnings
Expand All @@ -12,8 +12,8 @@
import pkg_resources
from ..default_servers import get_default_servers
from inspect import getfullargspec


from jsonpath_rw import parse as parse_jpath
import pandas as pd

def cli_credentials():
"""Parses command line credentials for Neo4J rest connection;
Expand Down Expand Up @@ -296,12 +296,49 @@ def _query(self, q):
else:
return r

def get_images(self, short_forms, template, image_folder, image_type='swc'):
"""Given an array of `short_forms` for instances, find all images of specified `image_type`
registered to `template`. Save these to `image_folder` along with a manifest.tsv. Return manifest as
pandas DataFrame."""
# TODO - make image type into array
image_expr = parse_jpath("$.channel_image.[*].image")
manifest = []
os.mkdir(image_folder)
inds = self.get_anatomical_individual_TermInfo(short_forms=short_forms)
for i in inds:
if not ('has_image' in i['term']['core']['types']):
continue
label = i['term']['core']['label']
image_matches = image_expr.find(i)
if not ([match.value for match in image_matches]):
continue
for im in image_matches:
imv = im.value
if imv['template_anatomy']['label'] == template:
r = requests.get(imv['image_folder'] + '/volume.' + image_type)
### Slightly dodgy warning - could mask network errors
if not r.ok:
warnings.warn("No '%s' file found for '%s'." % (image_type, label))
continue
filename = re.sub('\W', '_', label) + '.' + image_type
with open(image_folder + '/' + filename, 'w') as image_file:
image_file.write(r.text)
manifest.append(_populate_manifest(instance=i, filename=filename))
manifest_df = pd.DataFrame.from_records(manifest)
manifest_df.to_csv(image_folder + '/manifest.tsv', sep='\t')
return manifest_df

def get_dbs(self):
query = "MATCH (i:Individual) " \
"WHERE 'Site' in labels(i) OR 'API' in labels(i)" \
"return i.short_form"
return [d['i.short_form'] for d in self._query(query)]

def get_templates(self):
query = "MATCH (i:Individual:Template:Anatomy) " \
"RETURN i.short_form"
return self._get_TermInfo([d['i.short_form'] for d in self._query(query)], typ='Individual')

def vfb_id_2_xrefs(self, vfb_id, db='', id_type='', reverse_return=False):
"""Map a list of node short_form IDs in VFB to external DB IDs
Args:
Expand Down Expand Up @@ -395,7 +432,7 @@ def get_TermInfo(self, short_forms):
out.extend(self.get_DataSet_TermInfo([e['short_form']]))
return out

def _get_TermInfo(self, short_forms: list, typ, show_query=True):
def _get_TermInfo(self, short_forms: list, typ, show_query=False):
sfl = "', '".join(short_forms)
qs = Template(self.queries[typ]).substitute(ID=sfl)
if show_query:
Expand All @@ -421,3 +458,22 @@ def get_template_TermInfo(self, short_forms):
return self._get_TermInfo(short_forms, typ='Get JSON for Template')


def _populate_instance_summary_tab(instance):
d = dict()
d['label'] = instance['term']['core']['label']
d['url'] = instance['term']['core']['iri']
d['id'] = instance['term']['core']['short_form']
d['tags'] = instance['term']['core']['types']
parents_expr = parse_jpath("$.parents[*].symbol,label,short_form")
license_expr = parse_jpath("$.dataset_license.[*].license.link")
dataset_expr = parse_jpath("$.dataset_license.[*].dataset.core.iri")
d['parents'] = [match.value for match in parents_expr.find(instance)]
d['license'] = [match.value for match in license_expr.find(instance)]
d['dataset'] = [match.value for match in dataset_expr.find(instance)]
return d


def _populate_manifest(filename, instance):
d = _populate_instance_summary_tab(instance)
d['filename'] = filename
return d
38 changes: 35 additions & 3 deletions src/vfb_connect/test/cross_server_tools_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import unittest
from ..cross_server_tools import VfbConnect
import os
import shutil


class VfbConnectTest(unittest.TestCase):
Expand All @@ -15,10 +17,40 @@ def test_get_subclasses(self):
self.assertTrue(
self.vc.get_subclasses("fan-shaped body layer"))

def test_get_images(self):
def test_get_instances(self):
self.assertTrue(
self.vc.get_images("fan-shaped body"))
self.vc.get_instances("fan-shaped body"))
# Tests batched query
self.assertTrue(
len(self.vc.get_images('antennal lobe projection neuron')) > 1000)
len(self.vc.get_instances('antennal lobe projection neuron')) > 1000)

def test_get_images(self):
if os.path.exists('image_folder_tmp') and os.path.isdir('image_folder_tmp'):
shutil.rmtree('image_folder_tmp')
self.assertTrue(len(self.vc.neo_query_wrapper.get_images(['VFB_00000100', 'VFB_0010129x'],
image_folder='image_folder_tmp',
template='JRC2018Unisex')))

def test_get_images_by_type(self):
if os.path.exists('image_folder_tmp') and os.path.isdir('image_folder_tmp'):
shutil.rmtree('image_folder_tmp')
fu = self.vc.get_images_by_type("'fan-shaped neuron F1'",
image_folder='image_folder_tmp',
template='JRC2018Unisex')
self.assertTrue(len(fu) > 0)

def test_get_downstream_neurons(self):
fu = self.vc.get_neurons_downstream_of('D_adPN_R - 5813055184', classification="'Kenyon cell'", weight=20)
self.assertTrue(len(fu) > 0)

def test_get_upstream_neurons(self):
fu = self.vc.get_neurons_upstream_of('D_adPN_R - 5813055184', classification="GABAergic neuron", weight=20)
self.assertTrue(len(fu) > 0)

def test_get_connected_neurons_by_type(self):
fu = self.vc.get_connected_neurons_by_type('Kenyon cell', 'mushroom body output neuron', 20)
self.assertTrue(len(fu) > 0)

def tearDown(self):
if os.path.exists('image_folder_tmp') and os.path.isdir('image_folder_tmp'):
shutil.rmtree('image_folder_tmp')

0 comments on commit 45a8498

Please sign in to comment.