Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update to the docs branch #24

Merged
merged 5 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions gssnng/score_cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,15 @@ def run_gssnng(

samp_neighbors = None
error_checking(mat, samp_neighbors, recompute_neighbors,
gs_obj, score_method, ranked, method_params)
gs_obj, score_method, ranked, method_params, 0)

if method_params == None:
method_params = dict()

# score each cell with the list of gene sets
all_scores = _proc_data(mat, gs_obj, groupby, smooth_mode, recompute_neighbors,
score_method, method_params, samp_neighbors,
noise_trials, ranked, cores)
noise_trials, ranked, cores, 0)

# warning: the all_scores rows might have a diferent order!
# make sure to resort them according to the mat.obs.index
Expand Down Expand Up @@ -154,15 +154,15 @@ def with_gene_sets(

samp_neighbors = None
error_checking(adata, samp_neighbors, recompute_neighbors,
gs_obj, score_method, ranked, method_params)
gs_obj, score_method, ranked, method_params, 0)

if method_params == None:
method_params = dict()

# score each cell with the list of gene sets
all_scores = _proc_data(adata, gs_obj, groupby, smooth_mode, recompute_neighbors,
score_method, method_params, samp_neighbors,
noise_trials, ranked, cores)
noise_trials, ranked, cores, 0)
## join in new results
adata.obs = adata.obs.join(all_scores, how='left')

Expand Down Expand Up @@ -229,7 +229,8 @@ def _proc_data(
samp_neighbors: int,
noise_trials: int,
ranked: bool,
cores: int
cores: int,
return_data: int
):
"""
In many cases, the neighbors should be defined. If you have mixed clinical endpoints,
Expand All @@ -247,6 +248,7 @@ def _proc_data(
:param noise_trials: number of noisy samples to create, integer
:param ranked: whether the gene expression counts should be rank ordered
:param cores: number of parallel processes to work through groupby groups
:param return_data: should the smoothed data list be returned?

:returns: scores in a dict for each cell in a list.
"""
Expand Down Expand Up @@ -284,6 +286,9 @@ def _proc_data(
data_list = _build_data_list(adata, groupby, cats, recompute_neighbors, samp_neighbors, smooth_mode)
# then we can start scoring cells #

if return_data == 1:
return(data_list)

# building up the argument list for the parallel call of _score_all_cells_all_sets
arglist = []
for smoothed_adata, groupname in data_list:
Expand Down
52 changes: 52 additions & 0 deletions gssnng/smooth_anndatas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import anndata
from gssnng.score_cells import _proc_data
from gssnng.util import error_checking
from typing import Union

def smooth_anndata(
adata: anndata.AnnData,
groupby: Union[str, list, dict],
smooth_mode: str,
recompute_neighbors: int,
method_params: dict,
cores: int
) -> anndata.AnnData:

"""
nearest neighbor smoothing of the expression matrix

:param adata
anndata.AnnData containing the cells to be scored
:param groupby
either a column label in adata.obs, and all categories taken, or a dict specifies one group.
:param smooth_mode
`adjacency` or `connectivity`, which representation of the neighborhood graph to use.
`adjacency` weights all neighbors equally, `connectivity` weights close neighbors more
:param recompute_neighbors
should neighbors be recomputed within each group, 0 for no, >0 for yes and specifies N
:param method_params
specific params for each method.
:param cores
number of parallel processes to work through groupby groups

:returns: a list of adatas with smoothed data
"""

return_data = 1
noise_trials = 0 ### not used currently
samp_neighbors = None ### also not used
just_smoothing=1

error_checking(adata, samp_neighbors, recompute_neighbors,
None, None, None, method_params, just_smoothing)

if method_params == None:
method_params = dict()

# score each cell with the list of gene sets
data_list = _proc_data(adata, None, groupby, smooth_mode, recompute_neighbors,
None, method_params, samp_neighbors,
noise_trials, None, cores, return_data)

print("**done**")
return(data_list)
29 changes: 29 additions & 0 deletions gssnng/test/test_return_smoothed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
if __name__ == '__main__':

import scanpy as sc
from gssnng.smooth_anndatas import smooth_anndata
import time

def test_return_smoothed(adata):
res0 = smooth_anndata(adata=adata,
groupby='louvain',
smooth_mode='adjacency',
recompute_neighbors=32,
method_params={},
cores=4)
return(res0)


def test_score_all_sets():
q = sc.datasets.pbmc3k_processed()
t0 = time.time()
print('start time: ' + str(t0))
data_list = test_return_smoothed(q)
print('******DONE*******')
t1 = time.time()
print('end time: ' + str(t1))
print('TOTAL TIME: ' + str(t1-t0))
print(len(data_list))

test_score_all_sets()
print('test done')
34 changes: 19 additions & 15 deletions gssnng/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def error_checking(
gs_obj,
score_method,
ranked,
method_params
method_params,
just_smoothing
):
"""
QC on the adata. Need to make sure there's enough neighbors available given the sampling size.
Expand All @@ -23,30 +24,33 @@ def error_checking(
:param samp_neighbors: integer, number of neighbors to sample
"""

if type(method_params) != type(dict()):
raise Exception('ERROR: please use a dictionary to pass method params')

if any([xi in adata.obs.columns for xi in gs_obj.get_gs_names()]):
#raise Exception('ERROR: gene set names in columns of adata.obs, please drop.')
print("Warning! Dropping gene set names from obs!")
genesetlist = [x.name for x in gs_obj.set_list]
for gsi in genesetlist:
print('dropping: ' + gsi)
adata.obs.drop(columns=[gsi], inplace=True)

if 'gssnng_groupby' in adata.obs.columns:
adata.obs.drop(columns='gssnng_groupby', inplace=True)
#raise Exception("Error: please drop 'gssnng_groupby' as a column name.")
print('... and dropping gssnng_groupby column...')

if ranked == False and score_method == 'singscore':
raise Exception('ERROR: singscore requires ranked data, set ranked parameter to True')

if (recompute_neighbors == None) or (recompute_neighbors == 0):
n_neighbors = adata.uns['neighbors']['params']['n_neighbors'] #[0]# in older AnnData versions need this??
else:
n_neighbors = recompute_neighbors

if just_smoothing == 0:
# then do all other checks
if type(method_params) != type(dict()):
raise Exception('ERROR: please use a dictionary to pass method params')

if any([xi in adata.obs.columns for xi in gs_obj.get_gs_names()]):
#raise Exception('ERROR: gene set names in columns of adata.obs, please drop.')
print("Warning! Dropping gene set names from obs!")
genesetlist = [x.name for x in gs_obj.set_list]
for gsi in genesetlist:
print('dropping: ' + gsi)
adata.obs.drop(columns=[gsi], inplace=True)

if ranked == False and score_method == 'singscore':
raise Exception('ERROR: singscore requires ranked data, set ranked parameter to True')


#if n_neighbors < samp_neighbors:
# print('*******')
# print('WARNING: Number of neighbors too low for sampling parameter!')
Expand Down
Loading