Skip to content

Commit

Permalink
Merge branch 'master' into pubs_query_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
dosumis authored Sep 5, 2024
2 parents 39af777 + d962e47 commit db7112e
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 10 deletions.
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ jsonpath_rw
bottleneck<=1.3.6
matplotlib>3.9
seaborn>0.13
fonttools>=4.43.0 # not directly required, pinned by Snyk to avoid a vulnerability
pillow>=10.3.0 # not directly required, pinned by Snyk to avoid a vulnerability
2 changes: 1 addition & 1 deletion src/vfb_connect.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: vfb_connect
Version: 2.2.5.dev6+8aacf1a
Version: 2.2.7.dev7+f46d531.dirty
Summary: Wrapper for querying VirtualFlyBrain servers.
Home-page: https://github.com/VirtualFlyBrain/VFB_connect
Author: David Osumi-Sutherland
Expand Down
68 changes: 60 additions & 8 deletions src/vfb_connect/cross_server_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def __init__(self, neo_endpoint=get_default_servers()['neo_endpoint'],
self.nc = Neo4jConnect(**connections['neo'])
self.neo_query_wrapper = QueryWrapper(**connections['neo'])
self.cache_file = self.get_cache_file_path()
self._dbs_cache = {}
self.lookup = self.nc.get_lookup(cache=self.cache_file)
self.normalized_lookup = self.preprocess_lookup()
self.reverse_lookup = {v: k for k, v in self.lookup.items()}
Expand Down Expand Up @@ -699,10 +700,16 @@ def get_transcriptomic_profile(self, cell_type, gene_type=False, no_subtypes=Fal
"OPTIONAL MATCH (sa)-[:overlaps]->(tis:Class:Anatomy) "
"OPTIONAL MATCH (clus)-[:has_source]->(ds:DataSet:Individual) "
"OPTIONAL MATCH (ds)-[:has_reference]->(p:pub:Individual) "
"OPTIONAL MATCH (ds)-[dbx:database_cross_reference]->(s:Site:Individual) "
"OPTIONAL MATCH (ds)-[dbxw:database_cross_reference]->(sw:Site:Individual "
"{short_form:'scExpressionAtlas'}) "
"OPTIONAL MATCH (ds)-[dbxd:database_cross_reference]->(sd:Site:Individual "
"{short_form:'scExpressionAtlasFTP'}) WHERE dbxd.accession[0] = dbxw.accession[0] "
"RETURN DISTINCT c2.label AS cell_type, c2.short_form AS cell_type_id, "
"sex.label AS sample_sex, COLLECT(tis.label) AS sample_tissue, "
"p.miniref[0] as ref, g.label AS gene, g.short_form AS gene_id, "
"ds.short_form AS dataset_id, p.miniref[0] as ref, "
"sw.link_base[0] + dbxw.accession[0] AS website_linkout, "
"sd.link_base[0] + dbxd.accession[0] + sd.postfix[0] AS download_linkout, "
"g.label AS gene, g.short_form AS gene_id, "
"apoc.coll.subtract(labels(g), ['Class', 'Entity', 'hasScRNAseq', 'Feature', 'Gene']) AS function, "
"e.expression_extent[0] as extent, toFloat(e.expression_level[0]) as level "
"ORDER BY cell_type, g.label" % (gene_label, cell_type_short_form, equal_condition))
Expand Down Expand Up @@ -907,15 +914,56 @@ def vfb_id_2_xrefs(self, vfb_id: iter, db='', id_type='', reverse_return=False):
"""
return self.neo_query_wrapper.vfb_id_2_xrefs(vfb_id=vfb_id, db=db, id_type=id_type, reverse_return=reverse_return)

def get_dbs(self, include_symbols=True):
"""Get all external databases in the database.
def get_dbs(self, include_symbols=True, data_sources_only=True, verbose=False):
"""Get all external databases in the database, optionally filtering by data sources and including symbols.
:return: List of external databases in the database.
:param include_symbols: If True, include the symbols of the databases.
:type include_symbols: bool
:param data_sources_only: If True, only include databases where is_data_source=True.
:type data_sources_only: bool
:return: List of external databases and optionally their symbols.
:rtype: list
"""
if not self._dbs:
self._dbs = self.neo_query_wrapper.get_dbs(include_symbols=include_symbols)
return self._dbs
# Create a cache key based on the options to ensure unique cache for each option set
cache_key = (include_symbols, data_sources_only)

# Check if the result is already cached
if cache_key in self._dbs_cache and self._dbs_cache[cache_key]:
print("Returning cached results") if verbose else None
return self._dbs_cache[cache_key]

print("Querying for external database ids") if verbose else None
# Base query to get all databases, filtering for data sources if needed
query = "MATCH (i:Individual) "
if data_sources_only:
query += "WHERE i.is_data_source=[True] AND (i:Site OR i:API) "
else:
query += "WHERE i:Site OR i:API "
query += "RETURN i.short_form as id"

# Execute the query
print("Querying for external database ids:", query) if verbose else None
results = self.cypher_query(query, return_dataframe=False, verbose=verbose)
dbs = [d['id'] for d in results]

# Optionally include symbols
if include_symbols:
print("Querying for external database symbols") if verbose else None
symbol_query = "MATCH (i:Individual) "
if data_sources_only:
symbol_query += "WHERE i.is_data_source=[True] AND (i:Site OR i:API) "
else:
symbol_query += "WHERE i:Site OR i:API "
symbol_query += "AND exists(i.symbol) AND not i.symbol[0] = '' RETURN i.symbol[0] as symbol"

print("Querying for external database symbols:",symbol_query) if verbose else None
symbol_results = self.cypher_query(symbol_query, return_dataframe=False, verbose=verbose)
dbs.extend([d['symbol'] for d in symbol_results])

# Cache the results for this combination of parameters
self._dbs_cache[cache_key] = dbs

return dbs

def get_scRNAseq_expression(self, id, query_by_label=True, return_id_only=False, return_dataframe=True, verbose=False):
"""
Expand Down Expand Up @@ -1087,9 +1135,13 @@ def cypher_query(self, query, return_dataframe=True, verbose=False):
:return: A DataFrame or list of results.
:rtype: pandas.DataFrame or list of dicts
"""
print(f"Running query: {query}") if verbose else None
r = self.nc.commit_list([query])
print(r) if verbose else None
dc = dict_cursor(r)
print(dc) if verbose else None
if return_dataframe:
print("Returning DataFrame") if verbose else None
return pd.DataFrame.from_records(dc)
return dc

Expand Down
2 changes: 1 addition & 1 deletion src/vfb_connect/schema/vfb_term.py
Original file line number Diff line number Diff line change
Expand Up @@ -3016,7 +3016,7 @@ def __init__(self, terms: Union[List[VFBTerm], List[str], pandas.core.frame.Data
print(f"Changing {len(terms)} term names to ids") if verbose else None
terms = [self.vfb.lookup_id(term) for term in terms if term]
if self.vfb._load_limit and len(terms) > self.vfb._load_limit:
print(f"More thann the load limit of {self.vfb._load_limit} requested. Loading first {self.vfb._load_limit} terms out of {len(terms)}")
print(f"More than the load limit of {self.vfb._load_limit} requested. Loading first {self.vfb._load_limit} terms out of {len(terms)}")
terms = terms[:self.vfb._load_limit]
print(f"Pulling {len(terms)} terms from VFB...")
json_list = self.vfb.get_TermInfo(terms, summary=False, verbose=verbose, query_by_label=query_by_label)
Expand Down

0 comments on commit db7112e

Please sign in to comment.