Skip to content

Commit

Permalink
Update preprocess
Browse files Browse the repository at this point in the history
  • Loading branch information
xin-huang committed Nov 6, 2023
1 parent da07760 commit 85994b5
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 9 deletions.
7 changes: 4 additions & 3 deletions sstar/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ def preprocess_worker(in_queue, out_queue, **kwargs):
pvt_mut_nums = cal_pvt_mut_num(sub_ref_gts, sub_tgt_gts)
items['pvt_mut_nums'] = pvt_mut_nums
if ('individual allele frequency spectra' in kwargs['features'].keys()) and (kwargs['features']['individual allele frequency spectra']['output'] is True):
spectra = cal_n_ton(tgt_gts)
if kwargs['features']['genotypes']['phased'] is True: ploidy = 1
else: ploidy = kwargs['features']['genotypes']['ploidy']
spectra = cal_n_ton(tgt_gts, ploidy=ploidy)
items['spectra'] = spectra
if 'pairwise distances' in kwargs['features'].keys():
if ('reference' in kwargs['features']['pairwise distances'].keys()) and (kwargs['features']['pairwise distances']['reference']['output'] is True):
Expand Down Expand Up @@ -137,8 +139,7 @@ def _create_header(ref_samples, tgt_samples, features, output_genotypes):
if ('sstar' in features.keys()) and (features['sstar']['output'] is True): header += "\tS*_score"
if ('number of private mutations' in features.keys()) and (features['number of private mutations']['output'] is True): header += "\tprivate_SNP_num"
if ('individual allele frequency spectra' in features.keys()) and (features['individual allele frequency spectra']['output'] is True):
if features['genotypes']['phased'] is True: nsample = len(tgt_samples)*features['genotypes']['ploidy']
else: nsample = len(tgt_samples)
nsample = len(tgt_samples)*features['genotypes']['ploidy']
for i in range(nsample+1): header += f'\t{i}_ton'
if ('pairwise distances' in features.keys()):
if ('reference' in features['pairwise distances'].keys()) and (features['pairwise distances']['reference']['output'] is True):
Expand Down
13 changes: 7 additions & 6 deletions sstar/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,22 @@
from scipy.spatial import distance_matrix


def cal_n_ton(tgt_gt):
def cal_n_ton(tgt_gt, ploidy):
"""
Description:
Calculates individual frequency spetra for haplotypes.
Calculates individual frequency spetra for samples.
Arguments:
tgt_gt numpy.ndarray: Genotype matrix from the target population.
ploidy int: Ploidy of the genomes.
Returns:
spectra numpy.ndarray: Individual frequency spectra for haplotypes.
"""
mut_num, hap_num = tgt_gt.shape
iv = np.ones((hap_num, 1))
counts = tgt_gt*np.matmul(tgt_gt, iv)
spectra = np.array([np.bincount(counts[:,idx].astype('int8'), minlength=hap_num+1) for idx in range(hap_num)])
mut_num, sample_num = tgt_gt.shape
iv = np.ones((sample_num, 1))
counts = (tgt_gt>0)*np.matmul(tgt_gt, iv)
spectra = np.array([np.bincount(counts[:,idx].astype('int8'), minlength=sample_num*ploidy+1) for idx in range(sample_num)])
# ArchIE does not count non-segragating sites
#spectra[:,0] = 0

Expand Down

0 comments on commit 85994b5

Please sign in to comment.