Skip to content

Commit

Permalink
added return smoothed anndatas
Browse files Browse the repository at this point in the history
  • Loading branch information
Gibbsdavidl committed Jan 3, 2024
1 parent 9605ed3 commit 772664c
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 1 deletion.
7 changes: 6 additions & 1 deletion gssnng/score_cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ def _proc_data(
samp_neighbors: int,
noise_trials: int,
ranked: bool,
cores: int
cores: int,
return_data: int
):
"""
In many cases, the neighbors should be defined. If you have mixed clinical endpoints,
Expand All @@ -247,6 +248,7 @@ def _proc_data(
:param noise_trials: number of noisy samples to create, integer
:param ranked: whether the gene expression counts should be rank ordered
:param cores: number of parallel processes to work through groupby groups
:param return_data: should the smoothed data list be returned?
:returns: scores in a dict for each cell in a list.
"""
Expand Down Expand Up @@ -284,6 +286,9 @@ def _proc_data(
data_list = _build_data_list(adata, groupby, cats, recompute_neighbors, samp_neighbors, smooth_mode)
# then we can start scoring cells #

if return_data == 1:
return(data_list)

# building up the argument list for the parallel call of _score_all_cells_all_sets
arglist = []
for smoothed_adata, groupname in data_list:
Expand Down
55 changes: 55 additions & 0 deletions gssnng/smooth_anndatas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import anndata
from gssnng.score_cells import _proc_data
#from gssnng.util import error_checking
from typing import Union

def smooth_anndata(
adata: anndata.AnnData,
groupby: Union[str, list, dict],
smooth_mode: str,
recompute_neighbors: int,
method_params: dict,
cores: int
) -> anndata.AnnData:

"""
gene set scoring (all gene sets in file) with nearest neighbor smoothing of the expression matrix
Improved single cell scoring by:
- smoothing the data matrix
- adding noise to the nearest neighbor smoothing via `samp_neighbors`
- adding noise to the expression data itself (via noise_trials)
:param adata
anndata.AnnData containing the cells to be scored
:param groupby
either a column label in adata.obs, and all categories taken, or a dict specifies one group.
:param smooth_mode
`adjacency` or `connectivity`, which representation of the neighborhood graph to use.
`adjacency` weights all neighbors equally, `connectivity` weights close neighbors more
:param recompute_neighbors
should neighbors be recomputed within each group, 0 for no, >0 for yes and specifies N
:param method_params
specific params for each method.
:param cores
number of parallel processes to work through groupby groups
:returns: adata with gene set scores in .obs
"""

return_data = 1
noise_trials = 0 ### not used currently
samp_neighbors = None

#error_checking2(adata, recompute_neighbors, method_params) # UPDATE

if method_params == None:
method_params = dict()

# score each cell with the list of gene sets
data_list = _proc_data(adata, None, groupby, smooth_mode, recompute_neighbors,
None, method_params, None,
noise_trials, None, cores, return_data)

print("**done**")
return(data_list)
29 changes: 29 additions & 0 deletions gssnng/test/test_return_smoothed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
if __name__ == '__main__':

import scanpy as sc
from gssnng.smooth_anndatas import smooth_anndata
import time

def test_return_smoothed(adata):
res0 = smooth_anndata(adata=adata,
groupby='louvain',
smooth_mode='adjacency',
recompute_neighbors=32,
method_params={},
cores=4)
return(res0)


def test_score_all_sets():
q = sc.datasets.pbmc3k_processed()
t0 = time.time()
print('start time: ' + str(t0))
data_list = test_return_smoothed(q)
print('******DONE*******')
t1 = time.time()
print('end time: ' + str(t1))
print('TOTAL TIME: ' + str(t1-t0))
print(len(data_list))

test_score_all_sets()
print('test done')

0 comments on commit 772664c

Please sign in to comment.