diff --git a/sstar/archie_infer.py b/sstar/archie_infer.py index 1b849a4..3a455d5 100644 --- a/sstar/archie_infer.py +++ b/sstar/archie_infer.py @@ -19,17 +19,17 @@ def main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut ref_ind_file = str(demo_model_file) + "_new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".ref.ind.list" tgt_ind_file = str(demo_model_file) + "_new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".tgt.ind.list" - #ref_ind_file = os.path.join("config", "simulation", "nref_" + str(nref), "ntgt_" + str(ntgt), "ref.scr1.list") - #tgt_ind_file = os.path.join("config", "simulation", "nref_" + str(nref), "ntgt_" + str(ntgt), "sim.src1.list") scikitfile = output_prefix + ".scikit.pickle" statsmodelsfile = output_prefix + ".statsmodels.pickle" + #get all folders for prediction final_folders = infer.get_all_folders(model_name, os.path.join("nref_" + str(nref), "ntgt_" + str(ntgt))) sample_name = "nref_" + str(nref) + "_ntgt_" + str(ntgt) + #without ref_ and tgt_ind_file (are created within infer.predict_introgression_folders) #infer.predict_introgression_folders(nrep, nref, ntgt, seq_len, thread, output_prefix+ "test", final_folders, statsmodel=statsmodelsfile, scikitmodel=scikitfile, sample_name=sample_name, ref_ind_file=ref_ind_file, tgt_ind_file=tgt_ind_file, model_name=model_name, drop_dynamic_cols=False, evaluate=False, simulated=True, average_for_inference=False, compute_cutoffs=True, win_step_50k = False) - #without ref_ and tgt_ind_file (are created within infer) + infer.predict_introgression_folders(nrep, nref, ntgt, seq_len, thread, output_prefix+ "test", final_folders, statsmodel=statsmodelsfile, scikitmodel=scikitfile, sample_name=sample_name, ref_ind_file=ref_ind_file, tgt_ind_file=tgt_ind_file, model_name=model_name, drop_dynamic_cols=False, evaluate=False, simulated=True, average_for_inference=False, compute_cutoffs=True, win_step_50k = False) @@ -73,5 +73,4 @@ def main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut seed = int(args.seed) model_name=args.model_name - main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed,model_name) diff --git a/sstar/archie_infer_haplotypes.py b/sstar/archie_infer_haplotypes.py deleted file mode 100644 index 079365d..0000000 --- a/sstar/archie_infer_haplotypes.py +++ /dev/null @@ -1,77 +0,0 @@ -import sstar -import utils -import os -import demes -import numpy as np -import pandas as pd -from scipy.stats import norm -from scipy.stats import nbinom - -import train -import infer -import preprocess - -import argparse, os, sys, signal - -#def main(args=None): -def main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed, model_name): - - ref_ind_file = str(demo_model_file) + "_new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".ref.ind.list" - tgt_ind_file = str(demo_model_file) + "_new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".tgt.ind.list" - - #ref_ind_file = os.path.join("config", "simulation", "nref_" + str(nref), "ntgt_" + str(ntgt), "ref.scr1.list") - #tgt_ind_file = os.path.join("config", "simulation", "nref_" + str(nref), "ntgt_" + str(ntgt), "sim.src1.list") - scikitfile = output_prefix + ".scikit.pickle" - statsmodelsfile = output_prefix + ".statsmodels.pickle" - - final_folders = infer.get_all_folders(model_name, os.path.join("nref_" + str(nref), "ntgt_" + str(ntgt))) - - sample_name = "nref_" + str(nref) + "_ntgt_" + str(ntgt) - - #infer.predict_introgression_folders(nrep, nref, ntgt, seq_len, thread, output_prefix+ "test", final_folders, statsmodel=statsmodelsfile, scikitmodel=scikitfile, sample_name=sample_name, ref_ind_file=ref_ind_file, tgt_ind_file=tgt_ind_file, model_name=model_name, drop_dynamic_cols=False, evaluate=False, simulated=True, average_for_inference=False, compute_cutoffs=True, win_step_50k = False) - #without ref_ and tgt_ind_file (are created within infer) - infer.predict_introgression_folders(nrep, nref, ntgt, seq_len, thread, output_prefix+ "test", final_folders, statsmodel=statsmodelsfile, scikitmodel=scikitfile, sample_name=sample_name, ref_ind_file=ref_ind_file, tgt_ind_file=tgt_ind_file, model_name=model_name, drop_dynamic_cols=False, evaluate=False, simulated=True, average_for_inference=False, compute_cutoffs=True, win_step_50k = False, use_haplotype_acc=True) - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - parser.add_argument('--demo_model_file',type=str, required=True) - parser.add_argument('--nrep', type=int, required=True) - parser.add_argument('--nref', type=int, required=True) - parser.add_argument('--ntgt',type=int, required=True) - parser.add_argument('--ref_id',type=str, required=True) - parser.add_argument('--tgt_id', type=str, required=True) - parser.add_argument('--src_id', type=str, required=True) - parser.add_argument('--seq_len', type=int, required=True) - parser.add_argument('--mut_rate',type=float, required=True) - parser.add_argument('--rec_rate',type=float, required=True) - parser.add_argument('--thread',type=int, required=True) - parser.add_argument('--output_prefix',type=str, required=True) - parser.add_argument('--output_dir',type=str, required=True) - parser.add_argument('--seed',required=True) - parser.add_argument('--model_name',type=str,required=True) - - args = parser.parse_args() - - demo_model_file = args.demo_model_file - nrep = args.nrep - nref = args.nref - ntgt = args.ntgt - ref_id = args.ref_id - tgt_id = args.tgt_id - src_id = args.src_id - seq_len = args.seq_len - mut_rate = args.mut_rate - rec_rate = args.rec_rate - thread = args.thread - output_prefix = args.output_prefix - output_dir = args.output_dir - if args.seed == "None": - seed = None - else: - seed = int(args.seed) - model_name=args.model_name - - - main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed,model_name) diff --git a/sstar/archie_only_train.py b/sstar/archie_only_train.py deleted file mode 100644 index 7682653..0000000 --- a/sstar/archie_only_train.py +++ /dev/null @@ -1,137 +0,0 @@ -import sstar -import utils -import os -import demes -import numpy as np -import pandas as pd -from scipy.stats import norm -from scipy.stats import nbinom - -import train -import infer -import preprocess - -import argparse, os, sys, signal -import shutil - - -def main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed, folder_partitions, create_testdirs = False, train_file_name=None): - - if train_file_name == None: - train_file_name = str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile.csv" - - train_df = pd.read_csv(train_file_name) - #drop_dynamic_cols indicate whether non-fixed size features should be dropped - train_df_reduced = train_df.copy() - - train_df_no_kurtosis = train_df.copy() - train_df_no_paired_dist = train_df.copy() - - train_df_target_full_reduced = train_df.copy() - - - - dynamic_cols = [col for col in train_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] - - no_kurtosis_cols = [col for col in train_df.columns if ('kurtosis_pairwised_dist' in col or col.startswith("pairwised_dist")) ] - - no_paired_cols = [col for col in train_df.columns if (col.startswith("pairwised_dist"))] - - full_reduced_cols = [col for col in train_df.columns if ('-ton' in col or 'pairwised_dist' in col )] - - train_df_reduced.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - train_df_no_kurtosis.drop(no_kurtosis_cols, axis=1, inplace = True, errors='ignore') - train_df_no_paired_dist.drop(no_paired_cols, axis=1, inplace = True, errors='ignore') - train_df_target_full_reduced.drop(full_reduced_cols, axis=1, inplace = True, errors='ignore') - - - - #train_df.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile.csv") - - train_df_reduced.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile_fixed.csv") - - train_df_no_kurtosis.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile_nokurtosis.csv") - train_df_no_paired_dist.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile_nopaired.csv") - train_df_target_full_reduced.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile_tgtfullreduced.csv") - - - scikit_file = output_prefix + ".scikit.pickle" - statsmodels_file = output_prefix + ".statsmodels.pickle" - scikit_file_reduced = "fixed_" + output_prefix + ".scikit.pickle" - statsmodels_file_reduced = "fixed_" + output_prefix + ".statsmodels.pickle" - - scikit_file_no_kurtosis = "nokurt_" + output_prefix + ".scikit.pickle" - statsmodels_file_no_kurtosis = "nokurt_" + output_prefix + ".statsmodels.pickle" - - scikit_file_no_paired_dist = "nopaired_" + output_prefix + ".scikit.pickle" - statsmodels_file_no_paired_dist = "nopaired_" + output_prefix + ".statsmodels.pickle" - - scikit_file_full_reduced = "fullreduced_" + output_prefix + ".scikit.pickle" - statsmodels_file_full_reduced = "fullreduced_" + output_prefix + ".statsmodels.pickle" - - scikit_file_reduced = "fixed_" + output_prefix + ".scikit.pickle" - statsmodels_file_reduced = "fixed_" + output_prefix + ".statsmodels.pickle" - - #call training functions - - train.train_statsmodels(train_df, statsmodels_file) - - train.train_scikit(train_df, scikit_file) - train.train_statsmodels(train_df_reduced, statsmodels_file_reduced) - train.train_scikit(train_df_reduced, scikit_file_reduced) - - train.train_statsmodels(train_df_no_kurtosis, statsmodels_file_no_kurtosis) - train.train_scikit(train_df_no_kurtosis, scikit_file_no_kurtosis) - - train.train_statsmodels(train_df_no_paired_dist, statsmodels_file_no_paired_dist) - train.train_scikit(train_df_no_paired_dist, scikit_file_no_paired_dist) - - train.train_statsmodels(train_df_target_full_reduced, statsmodels_file_full_reduced) - train.train_scikit(train_df_target_full_reduced, scikit_file_full_reduced) - - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - parser.add_argument('--demo_model_file',type=str, required=True) - parser.add_argument('--nrep', type=int, required=True) - parser.add_argument('--nref', type=int, required=True) - parser.add_argument('--ntgt',type=int, required=True) - parser.add_argument('--ref_id',type=str, required=True) - parser.add_argument('--tgt_id', type=str, required=True) - parser.add_argument('--src_id', type=str, required=True) - parser.add_argument('--seq_len', type=int, required=True) - parser.add_argument('--mut_rate',type=float, required=True) - parser.add_argument('--rec_rate',type=float, required=True) - parser.add_argument('--thread',type=int, required=True) - parser.add_argument('--output_prefix',type=str, required=True) - parser.add_argument('--output_dir',type=str, required=True) - parser.add_argument('--seed',required=True) - - parser.add_argument('--folder_partitions',type=int,required=True) - - args = parser.parse_args() - demo_model_file = args.demo_model_file - nrep = args.nrep - nref = args.nref - ntgt = args.ntgt - ref_id = args.ref_id - tgt_id = args.tgt_id - src_id = args.src_id - seq_len = args.seq_len - mut_rate = args.mut_rate - rec_rate = args.rec_rate - thread = args.thread - output_prefix = args.output_prefix - output_dir = args.output_dir - if args.seed == "None": - seed = None - else: - seed = int(args.seed) - - folder_partitions = args.folder_partitions - - - main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed, folder_partitions) diff --git a/sstar/archie_train.py b/sstar/archie_train.py index d2fbb1d..517bbdb 100644 --- a/sstar/archie_train.py +++ b/sstar/archie_train.py @@ -17,94 +17,73 @@ def main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed, folder_partitions, create_testdirs = False): - + #this variable determines how many predicition-directories are created within one folder nrep_per_folder = int(nrep / folder_partitions) - train_df_list = [] for i in range(folder_partitions): curr_output_dir = output_dir + str(i) preprocess.store_global_parameters(demo_model_file, nrep_per_folder, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, curr_output_dir) + #create a training folder for the training set if not os.path.exists(curr_output_dir): os.makedirs(curr_output_dir) - #and for the test set - if create_testdirs == True: - if not os.path.exists(curr_output_dir + "test"): - os.makedirs(curr_output_dir + "test") - train._simulation_manager(demo_model_file, nrep_per_folder, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, curr_output_dir, seed) - - new_train_df = train._train_archie_return_df(demo_model_file, nrep_per_folder, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, curr_output_dir, drop_dynamic_cols=False) + new_train_df = train._train_archie(demo_model_file, nrep_per_folder, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, curr_output_dir, drop_dynamic_cols=False) train_df_list.append(new_train_df) - + #after appending the features to the dataframe, the folder continaing training examples is deleted shutil.rmtree(curr_output_dir) + #the full train dataframe train_df = pd.concat(train_df_list) - #create also reduced dfs - - #drop_dynamic_cols indicate whether non-fixed size features should be dropped + #create reduced dfs for training on reduced data sets train_df_reduced = train_df.copy() - train_df_no_kurtosis = train_df.copy() train_df_no_paired_dist = train_df.copy() - train_df_target_full_reduced = train_df.copy() - - + #drop_dynamic_cols indicate whether non-fixed size features should be dropped etc dynamic_cols = [col for col in train_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] - no_kurtosis_cols = [col for col in train_df.columns if ('kurtosis_pairwised_dist' in col or col.startswith("pairwised_dist")) ] - no_paired_cols = [col for col in train_df.columns if (col.startswith("pairwised_dist"))] - full_reduced_cols = [col for col in train_df.columns if ('-ton' in col or 'pairwised_dist' in col )] train_df_reduced.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - train_df_no_kurtosis.drop(no_kurtosis_cols, axis=1, inplace = True, errors='ignore') train_df_no_paired_dist.drop(no_paired_cols, axis=1, inplace = True, errors='ignore') train_df_target_full_reduced.drop(full_reduced_cols, axis=1, inplace = True, errors='ignore') - - + #reduced dataframes train_df.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile.csv") - train_df_reduced.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile_fixed.csv") - train_df_no_kurtosis.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile_nokurtosis.csv") train_df_no_paired_dist.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile_nopaired.csv") train_df_target_full_reduced.to_csv(str(demo_model_file) + "_nref" + str(nref) + "_ntgt" + str(ntgt) + "_finalfeaturefile_tgtfullreduced.csv") - + #names for models scikit_file = output_prefix + ".scikit.pickle" statsmodels_file = output_prefix + ".statsmodels.pickle" scikit_file_reduced = "fixed_" + output_prefix + ".scikit.pickle" statsmodels_file_reduced = "fixed_" + output_prefix + ".statsmodels.pickle" - scikit_file_no_kurtosis = "nokurt_" + output_prefix + ".scikit.pickle" statsmodels_file_no_kurtosis = "nokurt_" + output_prefix + ".statsmodels.pickle" - scikit_file_no_paired_dist = "nopaired_" + output_prefix + ".scikit.pickle" statsmodels_file_no_paired_dist = "nopaired_" + output_prefix + ".statsmodels.pickle" - scikit_file_full_reduced = "fullreduced_" + output_prefix + ".scikit.pickle" statsmodels_file_full_reduced = "fullreduced_" + output_prefix + ".statsmodels.pickle" - scikit_file_reduced = "fixed_" + output_prefix + ".scikit.pickle" statsmodels_file_reduced = "fixed_" + output_prefix + ".statsmodels.pickle" #call training functions train.train_statsmodels(train_df, statsmodels_file) - train.train_scikit(train_df, scikit_file) + train.train_statsmodels(train_df_reduced, statsmodels_file_reduced) train.train_scikit(train_df_reduced, scikit_file_reduced) @@ -160,5 +139,4 @@ def main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut folder_partitions = args.folder_partitions - main(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed, folder_partitions) diff --git a/sstar/infer.py b/sstar/infer.py index c4a4e42..a81b720 100644 --- a/sstar/infer.py +++ b/sstar/infer.py @@ -11,8 +11,43 @@ import pandas as pd import numpy as np import matplotlib.pyplot as plt +from concurrent.futures import ProcessPoolExecutor as Pool + +global nrep +global nref +global ntgt +global seq_len +global archaic_prop +global not_archaic_prop +global thread +global output_prefix +global output_dirs +global evaluate +global ref_ind_file +global tgt_ind_file +global anc_allele_file +global win_len +global win_step +global match_bonus +global max_mismatch +global mismatch_penalty +global process_archie +global discard_ambiguous + def get_all_folders(output_dir, ref_tgt_folder): + """ + Description: + get the name of all directories containing files for prediction (assuming the directory structure from sstar-analysis) + + Arguments: + output_dir str: folder in which vcf-files are stored, usually model-name + ref_tgt_folder str: subfolder in which vecf-files are stored, usually number of ref and tgt individuals + + Returns: + rep_folders str: contains all folders with vcf-files for prediction + """ + res_dir = os.path.join("results", "simulated_data", output_dir, ref_tgt_folder) rep_folders = [] for replicate, folder in enumerate(os.listdir(res_dir)): @@ -20,1073 +55,196 @@ def get_all_folders(output_dir, ref_tgt_folder): return rep_folders -def simulate_predict_introgression(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, statsmodel=None, scikitmodel=None): - output_dir = output_dir + "test" - output_prefix = output_prefix + "test" - create_testdata(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir) - - predict_introgression(seq_len, thread,output_prefix, output_dir, statsmodel, scikitmodel) - - -def predict_introgression(seq_len, thread, output_prefix, output_dir, statsmodel=None, scikitmodel=None, drop_dynamic_cols=True, evaluate=True, simulated=True, average_for_inference=False, compute_cutoffs=True, ref_ind_file=None, tgt_ind_file=None, compute_statsmodel =False, plot_curves = False): - #set filenames for individuals, reference and target (needed for prprocess.process_data) - - if ref_ind_file == None: - ref_ind_file = "new_sim.ref.ind.list" - if tgt_ind_file == None: - tgt_ind_file = "new_sim.tgt.ind.list" - - - anc_allele_file = None - - #set window length and stepsize - win_len = 50000 - win_step = 10000 - - #I think these parameters are NOT necessary for ArchIE - just retained for the signature of preprocess.process_data - match_bonus = 1 - max_mismatch = 1 - mismatch_penalty = 1 - - process_archie = True - - #tracts with a proportion between not_archaic and archaic are labeled as ambiguous (in _label) - archaic_prop = 0.7 - not_archaic_prop = 0.3 - - - true_tracts = [] - true_tracts_infer = [] - - features = [] - file_names = [] - replicate_counter = 0 - - for replicate, file in enumerate(os.listdir(output_dir)): - if file.endswith(".vcf") or file.endswith("biallelic.vcf.gz"): - #if file.endswith(".vcf") or (file.endswith("vcf.gz") and "biallelic" not in file): - - filename = os.path.splitext(file)[0] - feature_file = os.path.splitext(file)[0]+'.features' - - #computing of statistics etc. - preprocess.process_data(os.path.join(output_dir,file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - - #load true tracts - if file.endswith(".vcf"): - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' - else: - true_tract = file.split(".")[0]+'.introgressed.tracts.bed' - - true_tract_data = pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) - - add_ind = None - if true_tract_data is not None: - true_tract_data["rep"] = replicate_counter - if file.endswith(".vcf"): - true_tract_data["hap"] = true_tract_data["hap"].str.replace("hap_", "") - else: - - add_ind = "tsk_10" - true_tract_data["hap"] = 1 - true_tract_data["ind"] = add_ind - - - true_tracts_infer.append ( true_tract_data ) - - #label the true tracts according to the function from train - true_tract_labeled = train._label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len, add_ind) - #true_tract_labeled = train._label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len) - if true_tract_labeled is not None: - true_tract_labeled["rep"] = replicate_counter - - true_tracts.append(true_tract_labeled) - - #load the feature files with statistics created before - feature = pd.read_csv(os.path.join(output_dir, feature_file), sep="\t") - feature["rep"] = replicate_counter - - features.append(feature) - file_names.append(filename) - replicate_counter = replicate_counter + 1 - - - feature_df_labeleds = [] - - for i, feature_df in enumerate(features): - - if compute_cutoffs == True: - #label function from train is used - feature_df_labeled = train.label_feature_df(feature_df, true_tracts_infer[i], discard_ambiguous=False, replicates=True) - #feature_df_labeled = train.label_feature_df_archie(feature_df, true_tracts_infer[i], discard_ambiguous=False, replicates=True) - - else: - - feature_df_labeled = label_feature_df_archie_infer_only_label(feature_df, true_tracts[i], discard_ambiguous=False) - - - if not os.path.exists(os.path.join(output_dir, "feature_dfs")): - os.makedirs(os.path.join(output_dir, "feature_dfs")) - - feature_df_labeled.to_csv(os.path.join(output_dir, "feature_dfs", "features" + str(file_names[i]) + ".csv") ) - - feature_df_labeleds.append(feature_df_labeled) - - #create one big test dataframe - - test_df = pd.concat(feature_df_labeleds) - - #drop_dynamic_cols indicates whether all non-fixed size features should be dropped - if drop_dynamic_cols == True: - dynamic_cols = [col for col in test_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] - - test_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - #in the case of simulated data, we know the labels, but for inference we remove them for the test dataframe - if simulated == True and average_for_inference == False: - y_true = test_df["label"] - test_df.drop(["label"], axis=1, inplace=True, errors='ignore') - - - - #statsmodel inference - if statsmodel != None and compute_statsmodel==True: - test_df_pred = test_df.copy() - model_statsmodel = load_statsmodel(statsmodel) - - #only compute probabilities - if compute_cutoffs == False: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - test_df_pred["probabilities"] = y_prob - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions) - - - #statsmodel inference - if statsmodel != None and compute_statsmodel==True: - test_df_pred = test_df.copy() - model_statsmodel = load_statsmodel(statsmodel) - - #only compute probabilities - if compute_cutoffs == False: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - test_df_pred["probabilities"] = y_prob - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2_unphased(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / Statsmodel unphased") - - - #scikit inference - if scikitmodel != None: - test_df_pred = test_df.copy() - model_scikit = load_scikit(scikitmodel) - - if compute_cutoffs == False: - y_prob = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - if evaluate == True: - #create precision-recall curve using all instances from dataframe - evaluate_scikit(test_df_pred, y_true, model_scikit) - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - test_df_pred.drop(["probabilities", "tractstart", "tractend"], axis=1, inplace=True, errors='ignore') - y_pred = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - - test_df_pred["probabilities"] = y_pred - #new column, unneccessary should be changed - #start and end are necessary for comparing true and inferred tracts (see cal_accuracy_v2) - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - #create a list of dataframes, each dataframe corresponds to one individual / one haplotype / one replicate - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions) - - #scikit_precision_recall(df_final, plot_label="scikitmodel") - - if evaluate == True: - test_df_pred = test_df.copy() - evaluate_scikit(test_df_pred, y_true, model_scikit) - - - - #scikit inference - if scikitmodel != None: - test_df_pred = test_df.copy() - model_scikit = load_scikit(scikitmodel) - - if compute_cutoffs == False: - y_prob = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - if evaluate == True: - #create precision-recall curve using all instances from dataframe - evaluate_scikit(test_df_pred, y_true, model_scikit) - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - test_df_pred.drop(["probabilities", "tractstart", "tractend"], axis=1, inplace=True, errors='ignore') - y_pred = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - - test_df_pred["probabilities"] = y_pred - #new column, unneccessary should be changed - #start and end are necessary for comparing true and inferred tracts (see cal_accuracy_v2) - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - #create a list of dataframes, each dataframe corresponds to one individual / one haplotype / one replicate - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2_unphased(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / scikit unphased") - - #scikit_precision_recall(df_final, plot_label="scikitmodel") - - #if evaluate == True: - # evaluate_scikit(test_df_pred, y_true, model_scikit) - -def predict_introgression_folders_wo_parallel(seq_len, thread, output_prefix, output_dirs, statsmodel=None, scikitmodel=None, drop_dynamic_cols=True, evaluate=False, simulated=True, average_for_inference=False, compute_cutoffs=True, ref_ind_file=None, tgt_ind_file=None, model_name="archie", sample_name="sample1", compute_statsmodel = False, plot_curves=False): - #set filenames for individuals, reference and target (needed for prprocess.process_data) - - if ref_ind_file == None: - ref_ind_file = "new_sim.ref.ind.list" - if tgt_ind_file == None: - tgt_ind_file = "new_sim.tgt.ind.list" - - - anc_allele_file = None - - #set window length and stepsize - win_len = 50000 - win_step = 10000 - - #I think these parameters are NOT necessary for ArchIE - just retained for the signature of preprocess.process_data - match_bonus = 1 - max_mismatch = 1 - mismatch_penalty = 1 - - process_archie = True - - #tracts with a proportion between not_archaic and archaic are labeled as ambiguous (in _label) - archaic_prop = 0.7 - not_archaic_prop = 0.3 - - - true_tracts = [] - true_tracts_infer = [] - - features = [] - file_names = [] - replicate_counter = 0 - - feature_df_labeleds = [] - - for output_dir in output_dirs: - if os.path.isdir(output_dir): - for replicate, file in enumerate(os.listdir(output_dir)): - - if file.endswith(".vcf") or file.endswith("biallelic.vcf.gz"): - #if file.endswith(".vcf") or (file.endswith("vcf.gz") and "biallelic" not in file): - - filename = os.path.splitext(file)[0] - feature_file = os.path.splitext(file)[0]+'.features' - - #computing of statistics etc. - preprocess.process_data(os.path.join(output_dir,file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - - #load true tracts - if file.endswith(".vcf"): - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' - else: - true_tract = file.split(".")[0]+'.introgressed.tracts.bed' - - true_tract_data = pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) - - - add_ind = None - if true_tract_data is not None: - true_tract_data["rep"] = replicate_counter - if file.endswith(".vcf"): - true_tract_data["hap"] = true_tract_data["hap"].str.replace("hap_", "") - else: - - add_ind = "tsk_10" - true_tract_data["hap"] = 1 - true_tract_data["ind"] = add_ind - - - true_tracts_infer.append ( true_tract_data ) - - #label the true tracts according to the function from train - true_tract_labeled = train._label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len, add_ind) - - #label the true tracts according to the function from train - #true_tract_labeled = train._label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len) - if true_tract_labeled is not None: - true_tract_labeled["rep"] = replicate_counter - - true_tracts.append(true_tract_labeled) - - #load the feature files with statistics created before - feature = pd.read_csv(os.path.join(output_dir, feature_file), sep="\t") - feature["rep"] = replicate_counter - - features.append(feature) - file_names.append(filename) - - - - #for i, feature_df in enumerate(features): - - if evaluate == True: - #label function from train is used - feature_df_labeled = train.label_feature_df(feature, true_tract_data, discard_ambiguous=False, replicates=True) - #feature_df_labeled = train.label_feature_df_archie(feature_df, true_tracts_infer[i], discard_ambiguous=False, replicates=True) - - else: - - feature_df_labeled = label_feature_df_archie_infer_only_label(feature, true_tract_labeled, discard_ambiguous=False) - - - if not os.path.exists(os.path.join(output_dir, "feature_dfs")): - os.makedirs(os.path.join(output_dir, "feature_dfs")) - - feature_df_labeled.to_csv(os.path.join(output_dir, "feature_dfs", "features" + str(filename) + ".csv") ) - - feature_df_labeleds.append(feature_df_labeled) - - - replicate_counter = replicate_counter + 1 - - - - #create one big test dataframe - test_df = pd.concat(feature_df_labeleds) - - #drop_dynamic_cols indicates whether all non-fixed size features should be dropped - if drop_dynamic_cols == True: - dynamic_cols = [col for col in test_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] - test_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - #in the case of simulated data, we know the labels, but for inference we remove them for the test dataframe - if simulated == True and average_for_inference == False: - y_true = test_df["label"] - test_df.drop(["label"], axis=1, inplace=True, errors='ignore') - - - - #statsmodel inference - if statsmodel != None and compute_statsmodel==True: - test_df_pred = test_df.copy() - model_statsmodel = load_statsmodel(statsmodel) - - #only compute probabilities - if compute_cutoffs == False: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - test_df_pred["probabilities"] = y_prob - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2_wo_parallel(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions) - - - #statsmodel inference - if statsmodel != None and compute_statsmodel==True: - test_df_pred = test_df.copy() - model_statsmodel = load_statsmodel(statsmodel) - - #only compute probabilities - if compute_cutoffs == False: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - test_df_pred["probabilities"] = y_prob - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2_unphased_wo_parallel(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / Statsmodel unphased") - - - #scikit inference - if scikitmodel != None: - test_df_pred = test_df.copy() - model_scikit = load_scikit(scikitmodel) - - if compute_cutoffs == False: - y_prob = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - if evaluate == True: - #create precision-recall curve using all instances from dataframe - evaluate_scikit(test_df_pred, y_true, model_scikit) - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - test_df_pred.drop(["probabilities", "tractstart", "tractend"], axis=1, inplace=True, errors='ignore') - y_pred = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - - test_df_pred["probabilities"] = y_pred - #new column, unneccessary should be changed - #start and end are necessary for comparing true and inferred tracts (see cal_accuracy_v2) - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - #create a list of dataframes, each dataframe corresponds to one individual / one haplotype / one replicate - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2_wo_parallel(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / scikit phased") - - write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, "archie_1src_accuracy.txt") - - #scikit_precision_recall(df_final, plot_label="scikitmodel") - - if evaluate == True: - test_df_pred = test_df.copy() - evaluate_scikit(test_df_pred, y_true, model_scikit) - - - - #scikit inference - if scikitmodel != None: - test_df_pred = test_df.copy() - model_scikit = load_scikit(scikitmodel) - - if compute_cutoffs == False: - y_prob = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - if evaluate == True: - #create precision-recall curve using all instances from dataframe - evaluate_scikit(test_df_pred, y_true, model_scikit) - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - test_df_pred.drop(["probabilities", "tractstart", "tractend"], axis=1, inplace=True, errors='ignore') - y_pred = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - - test_df_pred["probabilities"] = y_pred - #new column, unneccessary should be changed - #start and end are necessary for comparing true and inferred tracts (see cal_accuracy_v2) - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - #create a list of dataframes, each dataframe corresponds to one individual / one haplotype / one replicate - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2_unphased_wo_parallel(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / scikit unphased") - - write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, "archie_unphased_1src_accuracy.txt") - - #scikit_precision_recall(df_final, plot_label="scikitmodel") - - #if evaluate == True: - # evaluate_scikit(test_df_pred, y_true, model_scikit) - - return test_df - - - -global seq_len -global archaic_prop -global not_archaic_prop -global thread -global output_prefix -global output_dirs -global evaluate -global ref_ind_file -global tgt_ind_file -global anc_allele_file -global win_len -global win_step -global match_bonus -global max_mismatch -global mismatch_penalty -global process_archie - -global discard_ambiguous -#def store_global_infer(seq_len, archaic_prop, not_archaic_prop, thread, output_prefix, output_dirs, evaluate, ref_ind_file, tgt_ind_file, anc_allele_file, win_len, win_step, match_bonus, max_mismatch, mismatch_penalty, process_archie): - -from concurrent.futures import ProcessPoolExecutor as Pool -def store_global_infer( seq_len_new, archaic_prop_new, not_archaic_prop_new, thread_new, output_prefix_new, output_dirs_new, evaluate_new, ref_ind_file_new, tgt_ind_file_new, anc_allele_file_new, win_len_new, win_step_new, match_bonus_new, max_mismatch_new, mismatch_penalty_new, discard_ambiguous_new, process_archie_new): - ''' - for using the pool for parallelization, the variables have to be stored globally - ''' - global seq_len - global archaic_prop - global not_archaic_prop - global thread - global output_prefix - global output_dirs - global evaluate - global ref_ind_file - global tgt_ind_file - global anc_allele_file - global win_len - global win_step - global match_bonus - global max_mismatch - global mismatch_penalty - global process_archie - - global discard_ambiguous - - seq_len = seq_len_new - archaic_prop = archaic_prop_new - not_archaic_prop = not_archaic_prop_new - thread = thread_new - output_prefix = output_prefix_new - output_dirs = output_dirs_new - evaluate = evaluate_new - ref_ind_file = ref_ind_file_new - tgt_ind_file = tgt_ind_file_new - anc_allele_file = anc_allele_file_new - win_len = win_len_new - win_step = win_step_new - match_bonus = match_bonus_new - max_mismatch = max_mismatch_new - mismatch_penalty = mismatch_penalty_new - process_archie = process_archie_new - - discard_ambiguous = discard_ambiguous_new - - - -def infer_parallel_process_label_data_sm(output_tuples): - ''' - this function preprocesses the data for inference (i.e. calculating statistics) - it is called via the pool parallelization process - ''' - global seq_len - global archaic_prop - global not_archaic_prop - global thread - global output_prefix - global output_dirs - global evaluate - global ref_ind_file - global tgt_ind_file - global anc_allele_file - global win_len - global win_step - global match_bonus - global max_mismatch - global mismatch_penalty - global process_archie - - - file = output_tuples[1] - replicate_counter = output_tuples[0] - output_dir = output_tuples[2] - - - #not necessary anymore - #filename = os.path.splitext(file)[0] - - #true_tracts_infer = [] - feature_file = os.path.splitext(file)[0]+'.features' - #computation of statistics - preprocess.process_data(os.path.join(output_dir,file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - - - if file.endswith(".vcf"): - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' - else: - true_tract = file.split(".")[0]+'.introgressed.tracts.bed' - - #reading not necessary - #true_tract_data = pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) - #for infer necessary - true_tract_data = pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) - - add_ind = None - if true_tract_data is not None: - true_tract_data["rep"] = replicate_counter - if file.endswith(".vcf"): - true_tract_data["hap"] = true_tract_data["hap"].str.replace("hap_", "") - else: - - add_ind = "tsk_10" - true_tract_data["hap"] = 1 - true_tract_data["ind"] = add_ind - - - #labeling of true tracts - true_tract_labeled = train._label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len, add_ind) - - #label the true tracts according to the function from train - if true_tract_labeled is not None: - true_tract_labeled["rep"] = replicate_counter - - - #load the feature files with statistics created before - feature = pd.read_csv(os.path.join(output_dir, feature_file), sep="\t") - feature["rep"] = replicate_counter - - - if evaluate == True: - #label function from train is used - feature_df_labeled = train.label_feature_df(feature, true_tract_data, only_above_threshold=True, discard_ambiguous=False, replicates=True) - #feature_df_labeled = train.label_feature_df_archie(feature_df, true_tracts_infer[i], discard_ambiguous=False, replicates=True) - - else: - - feature_df_labeled = label_feature_df_archie_infer_only_label(feature, true_tract_labeled, discard_ambiguous=False) - - - if not os.path.exists(os.path.join(output_dir, "feature_dfs")): - os.makedirs(os.path.join(output_dir, "feature_dfs")) - - return feature_df_labeled, true_tract_data - - -def predict_introgression_folders(nrep, nref, ntgt, seq_len, thread, output_prefix, output_dirs, statsmodel=None, scikitmodel=None, drop_dynamic_cols=True, evaluate=False, simulated=True, average_for_inference=False, compute_cutoffs=True, ref_ind_file=None, tgt_ind_file=None, model_name="archie", sample_name="sample1", compute_statsmodel = False, plot_curves=False, win_step_50k=False, discard_ambiguous=False, drop_kurtosis=False, drop_paired=False, drop_almost_all=False, use_haplotype_acc=False): - #set filenames for individuals, reference and target (needed for prprocess.process_data) - - if ref_ind_file == None: - ref_ind_file = str(model_name) + "_new_infer" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".ref.ind.list" - tgt_ind_file = str(model_name) + "_new_infer" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".tgt.ind.list" - train.create_ref_tgt_file(nref, ntgt, ref_ind_file, tgt_ind_file) - - - anc_allele_file = None - - #set window length and stepsize - - win_len = 50000 - if win_step_50k == True: - win_step = 50000 - else: - win_step = 10000 - - #I think these parameters are NOT necessary for ArchIE - just retained for the signature of preprocess.process_data - match_bonus = 1 - max_mismatch = 1 - mismatch_penalty = 1 - - process_archie = True - - #tracts with a proportion between not_archaic and archaic are labeled as ambiguous (in _label) - archaic_prop = 0.7 - not_archaic_prop = 0.3 - - - true_tracts = [] - - global true_tracts_infer - true_tracts_infer = [] - - features = [] - file_names = [] - replicate_counter = 0 - - feature_df_labeleds = [] - - #new parallel part - store_global_infer(seq_len, archaic_prop, not_archaic_prop, thread, output_prefix, output_dirs, evaluate, ref_ind_file, tgt_ind_file, anc_allele_file, win_len, win_step, match_bonus, max_mismatch, mismatch_penalty, discard_ambiguous, process_archie) - - - output_tuples = [] - - replicate_counter = 0 - for output_dir in output_dirs: - if os.path.isdir(output_dir): - for replicate, file in enumerate(os.listdir(output_dir)): - if file.endswith(".vcf") or file.endswith("biallelic.vcf.gz"): - output_tuples.append((replicate_counter, file, output_dir)) - replicate_counter = replicate_counter + 1 - - - feature_df_labeleds = [] - pool = Pool() - - feature_df_labeleds, true_tracts_infer = zip(* pool.map(infer_parallel_process_label_data_sm, output_tuples) ) - - #create one big test dataframe - test_df = pd.concat(feature_df_labeleds) - - true_tracts_infer =list(true_tracts_infer) - - #drop_dynamic_cols indicates whether all non-fixed size features should be dropped - if drop_dynamic_cols == True: - dynamic_cols = [col for col in test_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] - test_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - - #also for the other 'reduced forms' - - if drop_kurtosis == True: - dynamic_cols = [col for col in test_df.columns if ('kurtosis_pairwised_dist' in col or col.startswith("pairwised_dist")) ] - test_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - if drop_paired == True: - dynamic_cols = [col for col in test_df.columns if (col.startswith("pairwised_dist"))] - test_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - if drop_almost_all == True: - dynamic_cols = [col for col in test_df.columns if ('-ton' in col or 'pairwised_dist' in col )] - test_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - #in the case of simulated data, we know the labels, but for inference we remove them for the test dataframe - if simulated == True and average_for_inference == False: - y_true = test_df["label"] - test_df.drop(["label"], axis=1, inplace=True, errors='ignore') - - global new_dfs - - #statsmodel inference - if statsmodel != None and compute_statsmodel==True: - test_df_pred = test_df.copy() - model_statsmodel = load_statsmodel(statsmodel) - - #only compute probabilities - if compute_cutoffs == False: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - test_df_pred["probabilities"] = y_prob - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions) - - - #statsmodel inference - if statsmodel != None and compute_statsmodel==True: - test_df_pred = test_df.copy() - model_statsmodel = load_statsmodel(statsmodel) - - #only compute probabilities - if compute_cutoffs == False: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) - - test_df_pred["probabilities"] = y_prob - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2_unphased(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - - if plot_curves == True: - plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / Statsmodel unphased") - - - #scikit inference - if scikitmodel != None: - test_df_pred = test_df.copy() - model_scikit = load_scikit(scikitmodel) - - if compute_cutoffs == False: - pass - ''' - y_prob = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - - if evaluate == True: - #create precision-recall curve using all instances from dataframe - evaluate_scikit(test_df_pred, y_true, model_scikit, plot_curves, True, model_name + sample_name + "archie_scikit_precrec.txt", model_name + " ,rep: " + str(nrep) + " ,nref: " + str(nref) + " ,ntgt: " + str(ntgt)) - ''' - - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - test_df_pred.drop(["probabilities", "tractstart", "tractend"], axis=1, inplace=True, errors='ignore') - y_pred = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - - test_df_pred["probabilities"] = y_pred - #new column, unneccessary should be changed - #start and end are necessary for comparing true and inferred tracts (see cal_accuracy_v2) - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] - - df_final = test_df_pred - - #create a list of dataframes, each dataframe corresponds to one individual / one haplotype / one replicate - - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - - #new parallel - pool = Pool() - - if use_haplotype_acc == False: - precisions, recalls = zip(* pool.map(cal_accuracy_v2, cut_offs) ) - else: - precisions, recalls = zip(* pool.map(cal_accuracy_haplotypes_phased, cut_offs) ) - - ''' - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - ''' - - if plot_curves == True: - plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / scikit phased") - +def store_global_infer(nrep_new, nref_new, ntgt_new, seq_len_new, archaic_prop_new, not_archaic_prop_new, thread_new, output_prefix_new, output_dirs_new, evaluate_new, ref_ind_file_new, tgt_ind_file_new, anc_allele_file_new, win_len_new, win_step_new, match_bonus_new, max_mismatch_new, mismatch_penalty_new, discard_ambiguous_new, process_archie_new): + ''' + for using the pool for parallelization, the variables have to be stored globally + ''' + global nrep + global nref + global ntgt + global seq_len + global archaic_prop + global not_archaic_prop + global thread + global output_prefix + global output_dirs + global evaluate + global ref_ind_file + global tgt_ind_file + global anc_allele_file + global win_len + global win_step + global match_bonus + global max_mismatch + global mismatch_penalty + global process_archie + global discard_ambiguous - if use_haplotype_acc == False: - write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, model_name + sample_name + "archie_1src_accuracy.txt") - else: - write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, model_name + sample_name + "archie_1src_accuracy_haplotypes.txt") - - #scikit_precision_recall(df_final, plot_label="scikitmodel") + nrep = nrep_new + nref = nref_new + ntgt = ntgt_new + seq_len = seq_len_new + archaic_prop = archaic_prop_new + not_archaic_prop = not_archaic_prop_new + thread = thread_new + output_prefix = output_prefix_new + output_dirs = output_dirs_new + evaluate = evaluate_new + ref_ind_file = ref_ind_file_new + tgt_ind_file = tgt_ind_file_new + anc_allele_file = anc_allele_file_new + win_len = win_len_new + win_step = win_step_new + match_bonus = match_bonus_new + max_mismatch = max_mismatch_new + mismatch_penalty = mismatch_penalty_new + process_archie = process_archie_new + discard_ambiguous = discard_ambiguous_new - ''' - if evaluate == True: - test_df_pred = test_df.copy() - evaluate_scikit(test_df_pred, y_true, model_scikit, plot_curves, True, model_name + sample_name + "archie_scikit_precrec.txt", model_name + " ,rep: " + str(nrep) + " ,nref: " + str(nref) + " ,ntgt: " + str(ntgt)) - ''' +def infer_parallel_process_label_data_sm(output_tuples): + ''' + this function preprocesses the data for inference (i.e. calculating statistics) + it is called via the pool parallelization process + ''' + global nrep + global nref + global ntgt + global seq_len + global archaic_prop + global not_archaic_prop + global thread + global output_prefix + global output_dirs + global evaluate + global ref_ind_file + global tgt_ind_file + global anc_allele_file + global win_len + global win_step + global match_bonus + global max_mismatch + global mismatch_penalty + global process_archie + + replicate_counter = output_tuples[0] + file = output_tuples[1] + output_dir = output_tuples[2] - #scikit inference - if scikitmodel != None: - test_df_pred = test_df.copy() - model_scikit = load_scikit(scikitmodel) + feature_file = os.path.splitext(file)[0]+'.features' - if compute_cutoffs == False: - y_prob = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - if evaluate == True: - #create precision-recall curve using all instances from dataframe - evaluate_scikit(test_df_pred, y_true, model_scikit, plot_curves, True, model_name + sample_name + "archie_scikit_precrec.png", model_name + ", rep: " + str(nrep) + ", nref: " + str(nref) + ", ntgt: " + str(ntgt)) - #create cut-offs and compute precision-recall curve as in sstar-analysis - else: - test_df_pred.drop(["probabilities", "tractstart", "tractend"], axis=1, inplace=True, errors='ignore') - y_pred = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - - test_df_pred["probabilities"] = y_pred - #new column, unneccessary should be changed - #start and end are necessary for comparing true and inferred tracts (see cal_accuracy_v2) - test_df_pred["tractstart"] = test_df_pred["start"] - test_df_pred["tractend"] = test_df_pred["end"] + #computation of statistics + preprocess.process_data(os.path.join(output_dir,file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - df_final = test_df_pred - - #create a list of dataframes, each dataframe corresponds to one individual / one haplotype / one replicate - new_dfs = split_dfs_sample_replicate(df_final) - - precisions = [] - recalls = [] - cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] + if file.endswith(".vcf"): + true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' + else: + #this is only used for the old data and could finally be removed + true_tract = file.split(".")[0]+'.introgressed.tracts.bed' + - #new parallel - pool = Pool() - + #reading files for inference necessary + true_tract_data = pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) - if use_haplotype_acc == False: - precisions, recalls = zip(* pool.map(cal_accuracy_v2_unphased, cut_offs) ) - else: - precisions, recalls = zip(* pool.map(cal_accuracy_haplotypes_unphased, cut_offs) ) + add_ind = None + if true_tract_data is not None: + true_tract_data["rep"] = replicate_counter + if file.endswith(".vcf"): + true_tract_data["hap"] = true_tract_data["hap"].str.replace("hap_", "") + else: + #if the old data is used (and so the '.introgressed.tracts.bed', the name of the target individual is "tsk"+number_of_reference_individuals) + #this is only used for the old data and could finally be removed + add_ind = "tsk_" + nref + true_tract_data["hap"] = 1 + true_tract_data["ind"] = add_ind + #labeling of true tracts + true_tract_labeled = train._label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len, add_ind) - ''' - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2_unphased(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - ''' - if plot_curves == True: - plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / scikit unphased") + #label the true tracts according to the function from train + if true_tract_labeled is not None: + true_tract_labeled["rep"] = replicate_counter - if use_haplotype_acc == False: - write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, model_name + sample_name + "archie_unphased_1src_accuracy.txt") - else: - write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, model_name + sample_name + "archie_unphased_1src_accuracy_haplotypes.txt") - - #scikit_precision_recall(df_final, plot_label="scikitmodel") + #load the feature files with statistics created before + feature = pd.read_csv(os.path.join(output_dir, feature_file), sep="\t") + feature["rep"] = replicate_counter - if evaluate == True: - evaluate_scikit(test_df_pred, y_true, model_scikit, plot_curves, True, model_name + sample_name + "archie_scikit_precrec.png", model_name + " ,rep: " + str(nrep) + " ,nref: " + str(nref) + " ,ntgt: " + str(ntgt)) - return test_df + if evaluate == True: + #label function from train is used + feature_df_labeled = train.label_feature_df(feature, true_tract_data, only_above_threshold=True, discard_ambiguous=False, replicates=True) + else: + #when true/inferred tracts are compared, labeling is not necessary at this stage + feature_df_labeled = feature + if not os.path.exists(os.path.join(output_dir, "feature_dfs")): + os.makedirs(os.path.join(output_dir, "feature_dfs")) + return feature_df_labeled, true_tract_data +def predict_introgression_folders(nrep, nref, ntgt, seq_len, thread, output_prefix, output_dirs, statsmodel=None, scikitmodel=None, evaluate=False, simulated=True, compute_cutoffs=True, ref_ind_file=None, tgt_ind_file=None, training_name="", model_name="archie", sample_name="sample1", compute_statsmodel = False, plot_curves=False, win_step_50k=50000, discard_ambiguous=False,use_haplotype_acc=False): + """ + Description: + compute prdictions for files in all subdirectories of the path indicated by output_dirs -def predict_introgression_folders_allmodeltypes(nrep, nref, ntgt, seq_len, thread, output_prefix, output_dirs, statsmodel=None, scikitmodel=None, drop_dynamic_cols=True, evaluate=False, simulated=True, average_for_inference=False, compute_cutoffs=True, ref_ind_file=None, tgt_ind_file=None, model_name="archie", sample_name="sample1", compute_statsmodel = False, plot_curves=False, win_step_50k=False, discard_ambiguous=False, drop_kurtosis=True, drop_paired=True, drop_almost_all=True): - #set filenames for individuals, reference and target (needed for prprocess.process_data) + Arguments: + nrep int: number of replicates + nref int: number of reference individuals + ntgt int: number of target individuals + seq_len int: sequence length + output_prefix str: string used to select model + output_dirs str: indicates folder with subdirectories containing files for prediction + statsmodel str: name of statsmodel model file + scikitmodel str: name of scikit model file + evaluate bool: if True, precision-recall curves using scikit are computed on a window-level-basis + simulated bool: if True, the data already contains a 'label' column + compute_cutoffs bool: if True, compute cutoffs for inferred/true tracts (similar to the function for sstar from sstar-analysis) + ref_ind_file str: name of file containing reference individuals (if None, a new one is created according to tskit-conventions) + tgt_ind_file str: name of file containing target individuals + training_name str: arbitrary name for output files + model_name str: arbitrary name for output files + sample_name str: arbitrary name for output files + compute_statsmodel = False + plot_curves bool: if True, the precision-recall curves are plotted using matplotlib + win_step_50k int: length of window stepsize + discard_ambiguous bool: if True, ambiguous windows are discarded (i.e. not added to test dataframe and not used for computing cut-offs etc.) + use_haplotype_acc bool: if True, accuracy computation for the cut-offs is done on a window/haplotype-level-basis (i.e. not inferred/true tracts are compared, but a window is either introgressed or not) + Returns: + test_df DataFrame: contains all windows and corresponding information (statistics, label,...) + """ + + #set filenames for individuals, reference and target (needed for preprocess.process_data) if ref_ind_file == None: ref_ind_file = str(model_name) + "_new_infer" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".ref.ind.list" tgt_ind_file = str(model_name) + "_new_infer" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".tgt.ind.list" train.create_ref_tgt_file(nref, ntgt, ref_ind_file, tgt_ind_file) - anc_allele_file = None #set window length and stepsize - win_len = 50000 - if win_step_50k == True: - win_step = 50000 - else: - win_step = 10000 - + win_step = win_step_50k + #I think these parameters are NOT necessary for ArchIE - just retained for the signature of preprocess.process_data match_bonus = 1 max_mismatch = 1 mismatch_penalty = 1 - process_archie = True #tracts with a proportion between not_archaic and archaic are labeled as ambiguous (in _label) archaic_prop = 0.7 not_archaic_prop = 0.3 - - - true_tracts = [] + #true tracts have to be defined globally so that the can be accessed in the paralellization pool global true_tracts_infer true_tracts_infer = [] - - features = [] - file_names = [] - replicate_counter = 0 - feature_df_labeleds = [] #new parallel part - store_global_infer(seq_len, archaic_prop, not_archaic_prop, thread, output_prefix, output_dirs, evaluate, ref_ind_file, tgt_ind_file, anc_allele_file, win_len, win_step, match_bonus, max_mismatch, mismatch_penalty, discard_ambiguous, process_archie) - + store_global_infer(nrep, nref, ntgt,seq_len, archaic_prop, not_archaic_prop, thread, output_prefix, output_dirs, evaluate, ref_ind_file, tgt_ind_file, anc_allele_file, win_len, win_step, match_bonus, max_mismatch, mismatch_penalty, discard_ambiguous, process_archie) output_tuples = [] - replicate_counter = 0 for output_dir in output_dirs: if os.path.isdir(output_dir): @@ -1106,51 +264,63 @@ def predict_introgression_folders_allmodeltypes(nrep, nref, ntgt, seq_len, threa true_tracts_infer =list(true_tracts_infer) + possible_further_columns = ['label', 'overlap', 'overlap_percentage', 'interval', 'overlap_length', 'label_one_1', 'label_one_2', 'label_one_3', 'start', 'end', 'haplo', 'sample', 'rep', 'chrom'] + + #load model files + if scikitmodel != None: + model_scikit = load_scikit(scikitmodel) + scikit_available_features = list(model_scikit.feature_names_in_) + possible_further_columns + test_df = test_df[test_df.columns.intersection(scikit_available_features)] + print("Features taken from scikit model") + elif statsmodel != None: + model_statsmodel = load_scikit(scikitmodel) + statsmodels_available_features = list(model_statsmodel.feature_names_in_) + possible_further_columns + test_df = test_df[test_df.columns.intersection(statsmodels_available_features)] + print("Features taken from statsmodels model") + else: + print("No model provided!") + - #in the case of simulated data, we know the labels, but for inference we remove them for the test dataframe - if simulated == True and average_for_inference == False: + #in the case of simulated data, we know the labels (if determined before), but for inference we remove them from the test dataframe + if simulated == True and evaluate == True: y_true = test_df["label"] test_df.drop(["label"], axis=1, inplace=True, errors='ignore') + + #start inference / creation of precision-recall curves + if scikitmodel != None: + if evaluate == True: + scikit_full_inference(test_df, y_true, nrep, nref, ntgt, scikitmodel, compute_cutoffs, plot_curves, model_name, sample_name, type_name="") + else: + scikit_full_inference(test_df, None, nrep, nref, ntgt, scikitmodel, compute_cutoffs, plot_curves, model_name, sample_name, type_name="") + #alternatively, also statsmodel can be used + if compute_statsmodel == True and statsmodel != None: + statsmodel_full_inference(test_df, statsmodel, compute_cutoffs, plot_curves) - scikit_full_inference(test_df, y_true, nrep, nref, ntgt, scikitmodel, compute_cutoffs, plot_curves, model_name, sample_name, type_name="") - - - #drop_dynamic_cols indicates whether all non-fixed size features should be dropped - if drop_dynamic_cols == True: - dynamic_cols = [col for col in test_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] - test_df_drop_dynamic_cols = test_df.copy() - test_df_drop_dynamic_cols.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - scikit_full_inference(test_df, y_true, nrep, nref, ntgt, "fixed_" + scikitmodel, compute_cutoffs, plot_curves, model_name, sample_name, type_name="_dropdyn_") - - - #also for the other 'reduced forms' - - if drop_kurtosis == True: - dynamic_cols = [col for col in test_df.columns if ('kurtosis_pairwised_dist' in col or col.startswith("pairwised_dist")) ] - test_df_drop_dynamic_cols = test_df.copy() - test_df_drop_dynamic_cols.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - scikit_full_inference(test_df, y_true, nrep, nref, ntgt, "nokurt_" + scikitmodel, compute_cutoffs, plot_curves, model_name, sample_name, type_name="_dropkurt_") + return test_df - if drop_paired == True: - dynamic_cols = [col for col in test_df.columns if (col.startswith("pairwised_dist"))] - test_df_drop_dynamic_cols = test_df.copy() - test_df_drop_dynamic_cols.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - scikit_full_inference(test_df, y_true, nrep, nref, ntgt, "nopaired_" + scikitmodel, compute_cutoffs, plot_curves, model_name, sample_name, type_name="_droppaired_") - if drop_almost_all == True: - dynamic_cols = [col for col in test_df.columns if ('-ton' in col or 'pairwised_dist' in col )] - test_df_drop_dynamic_cols = test_df.copy() - test_df_drop_dynamic_cols.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - scikit_full_inference(test_df, y_true, nrep, nref, ntgt, "fullreduced_" + scikitmodel, compute_cutoffs, plot_curves, model_name, sample_name, type_name="_dropalmostall_") - +def scikit_full_inference(test_df, y_true, nrep, nref, ntgt, scikitmodel, compute_cutoffs, plot_curves, model_name, sample_name, type_name=""): + """ + Description: + this function creates precision-recall-curves using scikit + Arguments: + test_df DataFrame: dataframe containing test data set windows + y_true list: list containing the true labels of test dataframe + nrep int: number of replicates + nref int: number of reference individuals + ntgt int: number of target individuals + scikitmodel str: name of scikit model file + compute_cutoffs bool: if True, compute cutoffs for inferred/true tracts (similar to the function for sstar from sstar-analysis) + plot_curves bool: if True, the precision-recall curves are plotted using matplotlib + training_name str: arbitrary name for output files + model_name str: arbitrary name for output files + sample_name str: arbitrary name for output files + type_name str: arbitrary name for output files + """ -def scikit_full_inference(test_df, y_true, nrep, nref, ntgt, scikitmodel, compute_cutoffs, plot_curves, model_name, sample_name, type_name=""): - ''' - this function creates precision-recall-curves using scikit - ''' global true_tracts_infer global new_dfs @@ -1160,13 +330,6 @@ def scikit_full_inference(test_df, y_true, nrep, nref, ntgt, scikitmodel, comput if compute_cutoffs == False: pass - ''' - y_prob = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) - - if evaluate == True: - #create precision-recall curve using all instances from dataframe - evaluate_scikit(test_df_pred, y_true, model_scikit, plot_curves, True, model_name + sample_name + "archie_scikit_precrec.txt", model_name + " ,rep: " + str(nrep) + " ,nref: " + str(nref) + " ,ntgt: " + str(ntgt)) - ''' #create cut-offs and compute precision-recall curve as in sstar-analysis else: @@ -1174,7 +337,6 @@ def scikit_full_inference(test_df, y_true, nrep, nref, ntgt, scikitmodel, comput y_pred = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) test_df_pred["probabilities"] = y_pred - #new column, unneccessary should be changed #start and end are necessary for comparing true and inferred tracts (see cal_accuracy_v2) test_df_pred["tractstart"] = test_df_pred["start"] test_df_pred["tractend"] = test_df_pred["end"] @@ -1189,31 +351,17 @@ def scikit_full_inference(test_df, y_true, nrep, nref, ntgt, scikitmodel, comput recalls = [] cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] - #new parallel + #new parallel Pool pool = Pool() - precisions, recalls = zip(* pool.map(cal_accuracy_v2, cut_offs) ) - ''' - for cut_off in cut_offs: - prec, rec = cal_accuracy_v2(true_tracts_infer, new_dfs, cutoff = cut_off) - precisions.append(prec) - recalls.append(rec) - ''' - + #plot cut-offs if plot_curves == True: plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / scikit phased") + #write cut-offs to file write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, model_name + sample_name + type_name + "archie_1src_accuracy.txt") - - #scikit_precision_recall(df_final, plot_label="scikitmodel") - - ''' - if evaluate == True: - test_df_pred = test_df.copy() - evaluate_scikit(test_df_pred, y_true, model_scikit, plot_curves, True, model_name + sample_name + "archie_scikit_precrec.txt", model_name + " ,rep: " + str(nrep) + " ,nref: " + str(nref) + " ,ntgt: " + str(ntgt)) - ''' #scikit inference @@ -1222,7 +370,7 @@ def scikit_full_inference(test_df, y_true, nrep, nref, ntgt, scikitmodel, comput model_scikit = load_scikit(scikitmodel) if compute_cutoffs == False: - y_prob = infer_scikit(test_df_pred.copy(), model_scikit, probabilities=True) + if evaluate == True: #create precision-recall curve using all instances from dataframe evaluate_scikit(test_df_pred, y_true, model_scikit, plot_curves, True, model_name + sample_name + type_name + "archie_scikit_precrec.png", model_name + ", rep: " + str(nrep) + ", nref: " + str(nref) + ", ntgt: " + str(ntgt)) @@ -1252,34 +400,110 @@ def scikit_full_inference(test_df, y_true, nrep, nref, ntgt, scikitmodel, comput precisions, recalls = zip(* pool.map(cal_accuracy_v2_unphased, cut_offs) ) + #plot cut-offs + if plot_curves == True: + plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / scikit unphased") + #write cut-offs to file + write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, model_name + sample_name + "archie_unphased_1src_accuracy.txt") + + #precision-recall curve on window-level-basis + if evaluate == True: + evaluate_scikit(test_df_pred, y_true, model_scikit, plot_curves, True, model_name + sample_name + "archie_scikit_precrec.png", model_name + " ,rep: " + str(nrep) + " ,nref: " + str(nref) + " ,ntgt: " + str(ntgt)) + + + +def statsmodel_full_inference(test_df, statsmodel, compute_cutoffs, plot_curves): + """ + Description: + this function creates precision-recall-curves using scikit + + Arguments: + test_df DataFrame: dataframe containing test data set windows + scikitmodel str: name of scikit model file + compute_cutoffs bool: if True, compute cutoffs for inferred/true tracts (similar to the function for sstar from sstar-analysis) + plot_curves bool: if True, the precision-recall curves are plotted using matplotlib + """ + + if statsmodel != None: + test_df_pred = test_df.copy() + model_statsmodel = load_statsmodel(statsmodel) + + #only compute probabilities + if compute_cutoffs == False: + y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) + + #create cut-offs and compute precision-recall curve as in sstar-analysis + else: + y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) + + test_df_pred["probabilities"] = y_prob + test_df_pred["tractstart"] = test_df_pred["start"] + test_df_pred["tractend"] = test_df_pred["end"] + + df_final = test_df_pred + + new_dfs = split_dfs_sample_replicate(df_final) + + precisions = [] + recalls = [] + cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] + for cut_off in cut_offs: + prec, rec = cal_accuracy_v2(true_tracts_infer, new_dfs, cutoff = cut_off) + precisions.append(prec) + recalls.append(rec) + + if plot_curves == True: + plot_cutoffs(recalls, precisions) + + + #statsmodel inference + if statsmodel != None: + test_df_pred = test_df.copy() + model_statsmodel = load_statsmodel(statsmodel) + + #only compute probabilities + if compute_cutoffs == False: + y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) + + #create cut-offs and compute precision-recall curve as in sstar-analysis + else: + y_prob = infer_statsmodel(test_df_pred.copy(), model_statsmodel) + + test_df_pred["probabilities"] = y_prob + test_df_pred["tractstart"] = test_df_pred["start"] + test_df_pred["tractend"] = test_df_pred["end"] - ''' + df_final = test_df_pred + + new_dfs = split_dfs_sample_replicate(df_final) + + precisions = [] + recalls = [] + cut_offs = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 0.99, 0.999, 0.9999] for cut_off in cut_offs: prec, rec = cal_accuracy_v2_unphased(true_tracts_infer, new_dfs, cutoff = cut_off) precisions.append(prec) recalls.append(rec) - ''' + if plot_curves == True: - plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / scikit unphased") - - write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, model_name + sample_name + "archie_unphased_1src_accuracy.txt") - - #scikit_precision_recall(df_final, plot_label="scikitmodel") - - if evaluate == True: - evaluate_scikit(test_df_pred, y_true, model_scikit, plot_curves, True, model_name + sample_name + "archie_scikit_precrec.png", model_name + " ,rep: " + str(nrep) + " ,nref: " + str(nref) + " ,ntgt: " + str(ntgt)) - return test_df - + plot_cutoffs(recalls, precisions, title="Precision-Recall curve for computed cutoffs / Statsmodel unphased") +def write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, acc_filename): + """ + Description: + this function writes precision-recall-information to file + Arguments: + cut_offs list: list containing the used cut-ffs + precisions list: list containing the computed precision values + recalls int: list containing the computed recall values + model_name str: name of the model + sample_name str: name of the sample + acc_filename str: arbitrary name for output files + """ -def write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, acc_filename): - ''' - write precision-recall-information to file - ''' - prec_rec_list = [] for i, cut_off in enumerate(cut_offs): prec_rec_list.append([model_name, sample_name, cut_off, precisions[i], recalls[i]]) @@ -1290,9 +514,7 @@ def write_prec_recall_df(cut_offs, precisions, recalls, model_name, sample_name, if not os.path.exists(os.path.join("results", "inference", "archie")): os.makedirs(os.path.join("results", "inference", "archie")) - prec_rec_df.to_csv(os.path.join("results", "inference", "archie",acc_filename), sep="\t", index=False, na_rep="nan") - def plot_cutoffs(recs, precs, title=None): @@ -1323,55 +545,6 @@ def create_testdata_folders(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, s ''' train._simulation_manager_folders(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed) -def compute_average_probabilities_true_tracts(df, probabilities, true_tracts, output_file=None, cut_off=0.5): - #NOT used at the moment - #the idea was to compute the probabilities over one overlap region, probably not necessary - - df["probabilities"] = probabilities - - if output_file is not None: - df.to_csv(output_file) - - inferred_tracts = [] - - for tract_sample in true_tracts: - for entry in tract_sample.values.tolist(): - - start = entry[1] - end = entry[2] - - haplo = int(entry[3]) - ind = entry[4] - - replicate = entry[5] - - inferred_df = df.copy() - - inferred_df = inferred_df[(inferred_df["sample"] == ind) & (inferred_df["haplo"] == haplo) & (inferred_df["rep"] == replicate)] - - - inferred_df.drop(["overlap"], axis=1, inplace=True, errors='ignore') - - inferred_df['overlap2'] = 0 - inferred_df["overlap2"] = inferred_df.apply(lambda row: row.overlap2 + train.getOverlap(int(start),int(end), int(row.start),int(row.end)) , axis=1) - - inferred_df = inferred_df[inferred_df["overlap2"] > 0] - - #average over probabilities - inferred_df = inferred_df.groupby(by=['chrom','haplo', 'sample', 'rep'])['probabilities'].mean().reset_index() - - - inferred_df["tractstart"] = int(start) - inferred_df["tractend"] = int(end) - - inferred_tracts.append(inferred_df.copy()) - - inferred_tracts = pd.concat(inferred_tracts) - - inferred_tracts.drop(['overlap_length', 'interval', 'overlap_percentage', "label_one_1", "label_one_2", "label_one_3"], axis=1, inplace=True, errors='ignore') - return inferred_tracts - - def load_statsmodel(model_file): ''' @@ -1389,6 +562,7 @@ def load_scikit(model_file): ''' load scikit model ''' + import pickle with open(model_file , 'rb') as f: model_scikit = pickle.load(f) @@ -1397,34 +571,45 @@ def load_scikit(model_file): def infer_statsmodel(X_test, model): ''' - compute probabilities for test set with statsmodels logistic classification + Description: + compute probabilities for test set with statsmodel logistic classification + + Arguments: + X_test DataFrame: Dataframe containing all windows of the test data set + model Scikit Logistic Regression: Scikit model to use for predictions ''' + import statsmodels.api as sm + #replace nan values X_test.replace(np.nan, 0, inplace=True) X_test.replace(pd.NA, 0, inplace=True) + #remove columns unnecessary for prediction X_test.drop(['overlap_length','chrom', 'start', 'end', 'interval', 'overlap_percentage', "label_one_1", "label_one_2", "label_one_3"], axis=1, inplace=True, errors='ignore') - X_test.drop(['overlap','sample', 'haplo', 'rep'], axis=1, inplace=True, errors='ignore') - X_test.drop(["label"], axis=1, inplace=True, errors='ignore') - X_test = X_test.astype(float) - - predictions = model.predict(sm.add_constant(X_test, prepend=False)) return predictions -def infer_scikit(X_test, model, evaluate = True, probabilities = False): +def infer_scikit(X_test, model, probabilities = False): ''' - compute probabilities for test set with scikit logistic classification + Description: + compute probabilities for test set with scikit logistic classification + + Arguments: + X_test DataFrame: Dataframe containing all windows of the test data set + model Scikit Logistic Regression: Scikit model to use for predictions + probabilities bool: if True, probabilites are given as output ''' + + #replace nan values X_test.replace(np.nan, 0, inplace=True) X_test.replace(pd.NA, 0, inplace=True) - + #remove columns unnecessary for prediction X_test.drop(['overlap_length','chrom', 'start', 'end', 'interval', 'overlap_percentage', "label_one_1", "label_one_2", "label_one_3"], axis=1, inplace=True, errors='ignore') X_test.drop(['overlap','sample', 'haplo', 'rep'], axis=1, inplace=True, errors='ignore') X_test.drop(["label"], axis=1, inplace=True, errors='ignore') @@ -1439,7 +624,12 @@ def infer_scikit(X_test, model, evaluate = True, probabilities = False): def scikit_precision_recall(X_test, plot_label="prediction logistic/scikit"): ''' - given a dataframe with labels and predicted probabilities, create a precision-recall curve via scikit + Description: + given a dataframe with labels and predicted probabilities, create a precision-recall curve via scikit + + Arguments: + X_test DataFrame: Dataframe containing all windows of the test data set + plot_label str: title to appear on precision-recall curve ''' from sklearn.metrics import precision_recall_curve @@ -1463,23 +653,30 @@ def scikit_precision_recall(X_test, plot_label="prediction logistic/scikit"): def evaluate_scikit(X_test, y, model, plot_curves = False, write_model=True, filename=None, textlabel = "prediction logistic/scikit" ): ''' - given a dataframe and a list of the according labels, predict probabilities for the instances of the df and plot a precision-recall curve + Description: + given a test dataframe containing windows, the true labels for this dataframe and a scikit-model, create a precision-recall curve via scikit + + Arguments: + X_test DataFrame: Dataframe containing all windows of the test data set + y list: Dataframe containing true labels of all windows in X_test + model Scikit LogisticRegression Model: Scikit model to be used + plot_curves bool: if True, plot the curves using matplotlib + write_model bool: If True, write precision/recall-values to file + filename str: Name of the file to be written (if write_model is True) + textlabel str: title to appear on precision-recall curve ''' + from sklearn.metrics import precision_recall_curve X_test.drop(['haplo', 'rep', 'sample', 'overlap_length','chrom', 'start', 'end', 'interval', 'overlap_percentage', "label_one_1", "label_one_2", "label_one_3"], axis=1, inplace=True, errors='ignore') X_test.drop(["overlap", "probabilities", "tractstart", "tractend"], axis=1, inplace=True, errors='ignore') X_test.drop(["label"], axis=1, inplace=True, errors='ignore') - lr_probs = model.predict_proba(X_test) - print(np.unique(y)) - precision, recall, thresholds = precision_recall_curve(y.astype(int), lr_probs[:,1]) - - new_precrec_df = pd.DataFrame([precision, recall, thresholds]) - + thresholds = np.hstack((thresholds, [1])) + new_precrec_df = pd.DataFrame(np.array([precision, recall, thresholds]).T, columns = ["precision", "recall", "threshold"]) plt.plot(recall, precision, marker='.', label=textlabel) plt.xlabel('Recall') @@ -1490,15 +687,15 @@ def evaluate_scikit(X_test, y, model, plot_curves = False, write_model=True, fil if not os.path.exists(os.path.join("results", "inference", "archie")): os.makedirs(os.path.join("results", "inference", "archie")) plt.savefig(os.path.join("results", "inference", "archie",filename)) - #new_precrec_df.to_csv(os.path.join("results", "inference", "archie",filename), sep="\t", index=False, na_rep="nan") + import pickle + + pickle.dump(new_precrec_df, open(os.path.join("results", "inference", "archie",filename+".pickle"), "wb")) if plot_curves == True: plt.show() - - def label_feature_df_archie_infer_only_label(feature_df, true_tract_list, discard_ambiguous=False): """ Description: @@ -1516,7 +713,6 @@ def label_feature_df_archie_infer_only_label(feature_df, true_tract_list, discar if true_tract_list is None: return feature_df - true_tract_list["hap"] = true_tract_list["hap"].astype(str).str.replace("hap_", "") true_tract_list["hap"] = true_tract_list["hap"].astype(int) @@ -1529,12 +725,10 @@ def label_feature_df_archie_infer_only_label(feature_df, true_tract_list, discar replicate = entry[5] conditions = (feature_df["sample"] == ind) & (feature_df["haplo"] == haplo) & (feature_df["rep"] == replicate) - if tract_label[0] == 1: feature_df.loc[conditions, "label"] = 1 - if tract_label[2] == 1: if discard_ambiguous == True: @@ -1552,14 +746,10 @@ def cal_accuracy_v2(cutoff=0.5): """ Description: Helper function for calculating accuracy; in contrast to cal_accuracy from utils, it is iterated over all replicates and samples. + This function does not merge haplotypes, i.e. gives phased results Arguments: - true_tracts list: list of dataframes containing true introgresssed tracts. - inferred_dfs DataFrame: dataframe containing inferred tracts - - Returns: - precision float: Amount of true introgressed tracts detected divided by amount of inferred introgressed tracts. - recall float: Amount ot true introgressed tracts detected divided by amount of true introgressed tracts. + cutoff int: cutoff used for the calculation of precision and recall """ true_tracts = pd.concat(deepcopy(true_tracts_infer)) @@ -1620,7 +810,6 @@ def cal_accuracy_v2(cutoff=0.5): all_true_positives = all_true_positives + total_true_tracts - if float(all_total_inferred_tracts) == 0: precision = np.nan else: precision = all_true_positives / float(all_total_inferred_tracts) * 100 if float(all_total_true_tracts) == 0: recall = np.nan @@ -1636,11 +825,10 @@ def cal_accuracy_v2_unphased(cutoff=0.5): from copy import deepcopy """ Description: - Helper function for calculating accuracy; in contrast to cal_accuracy from utils, it is iterated over all replicates and samples. + Helper function for calculating accuracy; in contrast to cal_accuracy from utils, it is iterated over all replicates and samples (but not over haplotypes which are merged). Arguments: - true_tracts list: list of dataframes containing true introgresssed tracts. - inferred_dfs DataFrame: dataframe containing inferred tracts + cutoff int: cutoff used for the calculation of precision and recall Returns: precision float: Amount of true introgressed tracts detected divided by amount of inferred introgressed tracts. @@ -1662,7 +850,6 @@ def cal_accuracy_v2_unphased(cutoff=0.5): for haplo in sample: haplos.append(pd.DataFrame(haplo)) haplo = pd.concat(haplos) - haplo = pd.DataFrame(haplo) #apply cutoff @@ -1710,7 +897,6 @@ def cal_accuracy_v2_unphased(cutoff=0.5): all_true_positives = all_true_positives + total_true_tracts - if float(all_total_inferred_tracts) == 0: precision = np.nan else: precision = all_true_positives / float(all_total_inferred_tracts) * 100 if float(all_total_true_tracts) == 0: recall = np.nan @@ -1730,8 +916,10 @@ def cal_accuracy_haplotypes_unphased(cutoff=0.5, seqlen = 50000, archaic_prop = Helper function for calculating accuracy; in contrast to cal_accuracy from utils, it is iterated over all replicates and samples. Arguments: - true_tracts list: list of dataframes containing true introgresssed tracts. - inferred_dfs DataFrame: dataframe containing inferred tracts + cutoff int: cutoff used for the calculation of precision and recall + seqlen int: length of the chromosomes in the test data replicates + archaic_prop int: windows with an introgression above this threshold are treated as introgressed + not_archaic_prop int: windows with an introgression below this threshold are treated as non-introgressed Returns: precision float: Amount of true introgressed tracts detected divided by amount of inferred introgressed tracts. @@ -1740,7 +928,6 @@ def cal_accuracy_haplotypes_unphased(cutoff=0.5, seqlen = 50000, archaic_prop = true_tracts = pd.concat(deepcopy(true_tracts_infer)) - true_label_list = [] infer_label_list = [] @@ -1801,7 +988,6 @@ def cal_accuracy_haplotypes_unphased(cutoff=0.5, seqlen = 50000, archaic_prop = else: infer_label_list.append(2) - true_tracts = true_tracts[true_tracts["processed"] == 0] @@ -1823,8 +1009,6 @@ def cal_accuracy_haplotypes_unphased(cutoff=0.5, seqlen = 50000, archaic_prop = true_label_list.append(1) infer_label_list.append(0) - - true_label_array = np.array(true_label_list) infer_label_array = np.array(infer_label_list) @@ -1834,34 +1018,35 @@ def cal_accuracy_haplotypes_unphased(cutoff=0.5, seqlen = 50000, archaic_prop = precision = precision_score(true_label_array, infer_label_array) recall = recall_score(true_label_array, infer_label_array) - return precision, recall def cal_accuracy_haplotypes_phased(cutoff=0.5, seqlen = 50000, archaic_prop = 0.7, not_archaic_prop = 0.3): - import pybedtools - import numpy as np - import pandas as pd - from copy import deepcopy """ Description: Helper function for calculating accuracy; in contrast to cal_accuracy from utils, it is iterated over all replicates and samples. + This function does not merge haplotypes, i.e. gives phased results Arguments: - true_tracts list: list of dataframes containing true introgresssed tracts. - inferred_dfs DataFrame: dataframe containing inferred tracts + cutoff int: cutoff used for the calculation of precision and recall + seqlen int: length of the chromosomes in the test data replicates + archaic_prop int: windows with an introgression above this threshold are treated as introgressed + not_archaic_prop int: windows with an introgression below this threshold are treated as non-introgressed Returns: precision float: Amount of true introgressed tracts detected divided by amount of inferred introgressed tracts. recall float: Amount ot true introgressed tracts detected divided by amount of true introgressed tracts. """ - true_tracts = pd.concat(deepcopy(true_tracts_infer)) + import pybedtools + import numpy as np + import pandas as pd + from copy import deepcopy + true_tracts = pd.concat(deepcopy(true_tracts_infer)) true_label_list = [] infer_label_list = [] - true_tracts["processed"] = 0 for replicate in new_dfs: @@ -1876,7 +1061,6 @@ def cal_accuracy_haplotypes_phased(cutoff=0.5, seqlen = 50000, archaic_prop = 0. if haplo.shape[0] > 0: - curr_rep = int(haplo["rep"].unique()[0]) curr_sample = haplo["sample"].unique()[0] curr_haplo = int(haplo["haplo"].unique()[0]) @@ -1899,10 +1083,8 @@ def cal_accuracy_haplotypes_phased(cutoff=0.5, seqlen = 50000, archaic_prop = 0. total_true_tracts = sum([x.stop - x.start for x in (truth_tracts)]) true_positives = sum([x.stop - x.start for x in inferred_tracts.intersect(truth_tracts)]) - if (total_true_tracts <= int(not_archaic_prop * seqlen)) or (total_true_tracts >= int(archaic_prop * seqlen)): - if total_true_tracts <= int(not_archaic_prop * seqlen): true_label_list.append(0) @@ -1916,6 +1098,7 @@ def cal_accuracy_haplotypes_phased(cutoff=0.5, seqlen = 50000, archaic_prop = 0. infer_label_list.append(1) else: + #as ambiguous windows are already discarded, this else-clause should never be entered infer_label_list.append(2) @@ -1937,7 +1120,6 @@ def cal_accuracy_haplotypes_phased(cutoff=0.5, seqlen = 50000, archaic_prop = 0. infer_label_list.append(0) - true_label_array = np.array(true_label_list) infer_label_array = np.array(infer_label_list) @@ -1947,186 +1129,22 @@ def cal_accuracy_haplotypes_phased(cutoff=0.5, seqlen = 50000, archaic_prop = 0. precision = precision_score(true_label_array, infer_label_array) recall = recall_score(true_label_array, infer_label_array) - - return precision, recall - - -def cal_accuracy_v2_wo_parallel(true_tracts, inferred_dfs, cutoff=0.5): - import pybedtools - import numpy as np - import pandas as pd - from copy import deepcopy - """ - Description: - Helper function for calculating accuracy; in contrast to cal_accuracy from utils, it is iterated over all replicates and samples. - This version is not used anymore - - Arguments: - true_tracts list: list of dataframes containing true introgresssed tracts. - inferred_dfs DataFrame: dataframe containing inferred tracts - - Returns: - precision float: Amount of true introgressed tracts detected divided by amount of inferred introgressed tracts. - recall float: Amount ot true introgressed tracts detected divided by amount of true introgressed tracts. - """ - - true_tracts = pd.concat(deepcopy(true_tracts)) - - all_total_inferred_tracts = 0 - all_total_true_tracts = 0 - all_true_positives = 0 - - - for replicate in inferred_dfs: - - for sample in replicate: - - for haplo in sample: - haplo = pd.DataFrame(haplo) - - #apply cutoff - haplo = haplo[haplo["probabilities"] >= cutoff] - - if haplo.shape[0] > 0: - - curr_rep = int(haplo["rep"].unique()[0]) - curr_sample = haplo["sample"].unique()[0] - curr_haplo = int(haplo["haplo"].unique()[0]) - - conditions = (true_tracts["ind"].astype(str) == curr_sample) & (true_tracts["hap"].astype(int) == curr_haplo) & (true_tracts["rep"].astype(int) == curr_rep) - curr_truth_tracts = true_tracts[conditions] - true_tracts.drop(true_tracts[conditions].index) - - truth_tracts = curr_truth_tracts[["chr", "start", "end"]] - - inferred_tracts = haplo[["chrom", "tractstart", "tractend"]] - inferred_tracts.columns = ["chrom", "start", "end"] - truth_tracts.columns = ["chrom", "start", "end"] - - truth_tracts = pybedtools.BedTool.from_dataframe(truth_tracts).sort().merge() - inferred_tracts = pybedtools.BedTool.from_dataframe(inferred_tracts).sort().merge() - - total_inferred_tracts = sum([x.stop - x.start for x in (inferred_tracts)]) - total_true_tracts = sum([x.stop - x.start for x in (truth_tracts)]) - true_positives = sum([x.stop - x.start for x in inferred_tracts.intersect(truth_tracts)]) - - all_total_inferred_tracts = all_total_inferred_tracts + total_inferred_tracts - all_total_true_tracts = all_total_true_tracts + total_true_tracts - all_true_positives = all_true_positives + true_positives - - - for i, tract in true_tracts.iterrows(): - curr_truth_tracts = tract.to_frame().T - truth_tracts = curr_truth_tracts[["chr", "start", "end"]] - truth_tracts.columns = ["chrom", "start", "end"] - truth_tracts = pybedtools.BedTool.from_dataframe(truth_tracts).sort().merge() - total_true_tracts = sum([x.stop - x.start for x in (truth_tracts)]) - all_total_true_tracts = all_total_true_tracts + total_true_tracts - - - if float(all_total_inferred_tracts) == 0: precision = np.nan - else: precision = all_true_positives / float(all_total_inferred_tracts) * 100 - if float(all_total_true_tracts) == 0: recall = np.nan - else: recall = all_true_positives / float(all_total_true_tracts) * 100 - return precision, recall - -def cal_accuracy_v2_unphased_wo_parallel(true_tracts, inferred_dfs, cutoff=0.5): - import pybedtools - import numpy as np - import pandas as pd - from copy import deepcopy - """ - Description: - Helper function for calculating accuracy; in contrast to cal_accuracy from utils, it is iterated over all replicates and samples. - This version is not used anymore - - Arguments: - true_tracts list: list of dataframes containing true introgresssed tracts. - inferred_dfs DataFrame: dataframe containing inferred tracts - - Returns: - precision float: Amount of true introgressed tracts detected divided by amount of inferred introgressed tracts. - recall float: Amount ot true introgressed tracts detected divided by amount of true introgressed tracts. - """ - - true_tracts = pd.concat(deepcopy(true_tracts)) - - all_total_inferred_tracts = 0 - all_total_true_tracts = 0 - all_true_positives = 0 - - for replicate in inferred_dfs: - - for sample in replicate: - haplos = [] - for haplo in sample: - haplos.append(pd.DataFrame(haplo)) - haplo = pd.concat(haplos) - - #apply cutoff - haplo = haplo[haplo["probabilities"] >= cutoff] - - if haplo.shape[0] > 0: - - curr_rep = int(haplo["rep"].unique()[0]) - curr_sample = haplo["sample"].unique()[0] - - conditions = (true_tracts["ind"].astype(str) == curr_sample) & (true_tracts["rep"].astype(int) == curr_rep) - curr_truth_tracts = true_tracts[conditions] - true_tracts.drop(true_tracts[conditions].index) - - truth_tracts = curr_truth_tracts[["chr", "start", "end"]] - - inferred_tracts = haplo[["chrom", "tractstart", "tractend"]] - inferred_tracts.columns = ["chrom", "start", "end"] - truth_tracts.columns = ["chrom", "start", "end"] - - truth_tracts = pybedtools.BedTool.from_dataframe(truth_tracts).sort().merge() - inferred_tracts = pybedtools.BedTool.from_dataframe(inferred_tracts).sort().merge() - - total_inferred_tracts = sum([x.stop - x.start for x in (inferred_tracts)]) - total_true_tracts = sum([x.stop - x.start for x in (truth_tracts)]) - true_positives = sum([x.stop - x.start for x in inferred_tracts.intersect(truth_tracts)]) - - all_total_inferred_tracts = all_total_inferred_tracts + total_inferred_tracts - all_total_true_tracts = all_total_true_tracts + total_true_tracts - all_true_positives = all_true_positives + true_positives - - - for i, tract in true_tracts.iterrows(): - curr_truth_tracts = tract.to_frame().T - truth_tracts = curr_truth_tracts[["chr", "start", "end"]] - truth_tracts.columns = ["chrom", "start", "end"] - truth_tracts = pybedtools.BedTool.from_dataframe(truth_tracts).sort().merge() - total_true_tracts = sum([x.stop - x.start for x in (truth_tracts)]) - all_total_true_tracts = all_total_true_tracts + total_true_tracts - - - if float(all_total_inferred_tracts) == 0: precision = np.nan - else: precision = all_true_positives / float(all_total_inferred_tracts) * 100 - if float(all_total_true_tracts) == 0: recall = np.nan - else: recall = all_true_positives / float(all_total_true_tracts) * 100 - - return precision, recall - def split_dfs_sample_replicate(df, save_replicates=False): """ Description: - Helper function for partition the merged data for all replicates and all samples ( - creating complete DataFrames at first and afterwards split them is, of course, redundant work; - it would be certain beneficial to change the workflow a bit - especially for working on the cluster etc.) + Helper function for partition the merged data for all replicates and all samples Arguments: df DataFrame: list of dataframes containing true introgresssed tracts. + save_replicates bool: if True, save replicate data in single csv files Returns: new_dfs list: list of DataFrames containing information about one sample of one replicate - """ - + replicates = [v for k, v in df.groupby('rep')] if save_replicates == True: @@ -2142,4 +1160,4 @@ def split_dfs_sample_replicate(df, save_replicates=False): new_samples.append(all_haplos) new_dfs.append(new_samples) - return new_dfs + return new_dfs \ No newline at end of file diff --git a/sstar/notebooks/archie_example.ipynb b/sstar/notebooks/archie_example.ipynb new file mode 100644 index 0000000..ba63f61 --- /dev/null +++ b/sstar/notebooks/archie_example.ipynb @@ -0,0 +1,910 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sstar" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import preprocess\n", + "import train\n", + "import stats\n", + "import infer" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "#parameters for training\n", + "\n", + "demo_model_file=\"./examples/models/archie2.yaml\"\n", + "nrep = 1000\n", + "nref = 10\n", + "ntgt= 1\n", + "ref_id = 'N1'\n", + "tgt_id = 'N2'\n", + "src_id = 'Na'\n", + "seq_len = 50000\n", + "mut_rate = 1.25e-8\n", + "rec_rate = 1e-8\n", + "thread = 6\n", + "output_prefix = \"example1\"\n", + "output_dir = \"example2\"\n", + "seed = None\n", + "preprocess.store_global_parameters(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "#create training folders\n", + "if not os.path.exists(output_dir):\n", + " os.makedirs(output_dir)\n", + "\n", + "#and for the test set\n", + "if not os.path.exists(output_dir + \"test\"):\n", + " os.makedirs(output_dir + \"test\") " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "train._simulation_manager_folders(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "#train logistisc classifier with simulated data\n", + "#train.train_parameters_archienew_fin(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#create training data and return dataframe\n", + "newdf = train._train_archie_folders(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, drop_dynamic_cols=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0-ton1-ton2-tonpairwised_dist1pairwised_dist2mean_pairwised_distvar_pairwised_distskew_pairwised_distkurtosis_pairwised_distmin_dist_to_refS*_scoreprivate_SNP_numlabel
0013130.04.6904162.3452085.500.0-2.03.0000000.000
109130.04.6904162.3452085.500.0-2.01.4142140.000
00870.04.5825762.2912885.250.0-2.03.6055510.050
101370.04.5825762.2912885.250.0-2.04.2426410.030
001870.05.0000002.5000006.250.0-2.03.7416570.080
..........................................
108120.03.6055511.8027763.250.0-2.02.0000000.030
00780.03.4641021.7320513.000.0-2.02.8284270.040
10580.03.4641021.7320513.000.0-2.02.4494900.040
0037160.08.4852814.24264118.000.0-2.06.2449984296.0130
1035160.08.4852814.24264118.000.0-2.07.2111034296.0180
\n", + "

1972 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " 0-ton 1-ton 2-ton pairwised_dist1 pairwised_dist2 \\\n", + "0 0 13 13 0.0 4.690416 \n", + "1 0 9 13 0.0 4.690416 \n", + "0 0 8 7 0.0 4.582576 \n", + "1 0 13 7 0.0 4.582576 \n", + "0 0 18 7 0.0 5.000000 \n", + ".. ... ... ... ... ... \n", + "1 0 8 12 0.0 3.605551 \n", + "0 0 7 8 0.0 3.464102 \n", + "1 0 5 8 0.0 3.464102 \n", + "0 0 37 16 0.0 8.485281 \n", + "1 0 35 16 0.0 8.485281 \n", + "\n", + " mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \\\n", + "0 2.345208 5.50 0.0 \n", + "1 2.345208 5.50 0.0 \n", + "0 2.291288 5.25 0.0 \n", + "1 2.291288 5.25 0.0 \n", + "0 2.500000 6.25 0.0 \n", + ".. ... ... ... \n", + "1 1.802776 3.25 0.0 \n", + "0 1.732051 3.00 0.0 \n", + "1 1.732051 3.00 0.0 \n", + "0 4.242641 18.00 0.0 \n", + "1 4.242641 18.00 0.0 \n", + "\n", + " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num label \n", + "0 -2.0 3.000000 0.0 0 0 \n", + "1 -2.0 1.414214 0.0 0 0 \n", + "0 -2.0 3.605551 0.0 5 0 \n", + "1 -2.0 4.242641 0.0 3 0 \n", + "0 -2.0 3.741657 0.0 8 0 \n", + ".. ... ... ... ... ... \n", + "1 -2.0 2.000000 0.0 3 0 \n", + "0 -2.0 2.828427 0.0 4 0 \n", + "1 -2.0 2.449490 0.0 4 0 \n", + "0 -2.0 6.244998 4296.0 13 0 \n", + "1 -2.0 7.211103 4296.0 18 0 \n", + "\n", + "[1972 rows x 13 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "newdf" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "output_dir_test = \"test_example1\"\n", + "output_prefix_test = \"pre_test_example1\"" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "#test_parameters\n", + "demo_model_file=\"./examples/models/archie2.yaml\"\n", + "nrep = 100\n", + "nref = 50\n", + "ntgt= 1\n", + "ref_id = 'N1'\n", + "tgt_id = 'N2'\n", + "src_id = 'Na'\n", + "seq_len = 500000\n", + "mut_rate = 1.25e-8\n", + "rec_rate = 1e-8\n", + "thread = 6\n", + "seed = None\n", + "preprocess.store_global_parameters(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix_test, output_dir_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "#load picklefile, alternatively use trained model from above\n", + "scikitfile = pd.read_pickle(\"archie_rtr_model_nref50_onemio.scikit.pickle\")\n", + "statsmodelsfile = pd.read_pickle(\"archie_rtr_model_nref50_onemio.statsmodels.pickle\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LogisticRegression(max_iter=1000, penalty=None, solver='newton-cg')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LogisticRegression(max_iter=1000, penalty=None, solver='newton-cg')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scikitfile" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "#create folders with testdata\n", + "infer.create_testdata_folders(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix_test, output_dir_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "#get all directories in test path\n", + "def listdir_fullpath(d):\n", + " return [os.path.join(d, f) for f in os.listdir(d)]\n", + "output_dirs_test = listdir_fullpath(output_dir_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Features taken from scikit model\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAHFCAYAAAAUpjivAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABiTklEQVR4nO3deVzT9R8H8NcYsAHiEBAGCIh4IOKFB2oqeJ+kmXllaVmZpUl2eFV4lKSZWZn+ysos86i0UjNTU9GSBEXywCOVSwVROZV7+/z+IJaTa+MabK/n47FH+d1n23vfXS8+38/n+5EIIQSIiIiIjJSZoQsgIiIiqk0MO0RERGTUGHaIiIjIqDHsEBERkVFj2CEiIiKjxrBDRERERo1hh4iIiIwaww4REREZNYYdIiIiMmoMOxX46quvIJFINBdzc3M0a9YMTz31FK5fv17n9UydOhXNmzfX6zbx8fGQSCT46quvaqWmykydOlVrH1paWsLb2xuvvvoqsrKyDFLT/craPyWve3x8vMHqMpSPP/4YLVu2hKWlJSQSCTIyMgxdUoO2du3aWvvsSSQSLFq0qFbuuzx79uyp9mMWFBTg+eefh4uLC6RSKTp16gQASEtLw4QJE+Dk5ASJRILRo0dXu14AmDNnDjp27Fgj91WRw4cPQyKR4PDhw5ptU6dORaNGjSq9bVBQEIKCgjT/zsnJwaJFi7Tuq6boWlN907x5c0ydOrXKtzevuVKM14YNG+Dj44Pc3FwcOXIEYWFhCA8Px5kzZ2BjY1Nndbz55puYPXu2XrdxcXFBREQEvL29a6mqyllZWeHgwYMAgIyMDPzwww94//33cfr0aezbt89gdZG2mJgYvPTSS3jmmWcwZcoUmJubw9bW1tBlNWhr166Fo6Njtb6k65M9e/bgk08+qVbgWbduHT799FN8/PHH6NKli+aHd+nSpfjxxx/x5ZdfwtvbG/b29jVS844dO/D000/XyH1VxN/fHxEREfD19dX7tmvXrtX6d05ODhYvXgwAWiGIqo5hRwd+fn7o2rUrAKBfv35QqVRYunQpfvrpJzz++ONl3iYnJwfW1tY1WkdVAotMJkOPHj1qtA59mZmZadUwdOhQXL16Ffv370dcXBy8vLwMWF39lpubCysrqzp5rHPnzgEAnn32WXTv3r1G7rM2PgfUsJ09exZWVlaYOXNmqe3e3t7lfqdWRVRUFBISEvDoo4/W2H2Wp3HjxlX+rq1KQCL98DBWFZS8oRMSEgD81y145swZDB48GLa2thgwYACA4i7bt99+Gz4+PpDJZGjatCmeeuop3Lp1q9T9bt68GT179kSjRo3QqFEjdOrUCV988YXm+rIOY33//fcICAiAQqGAtbU1WrRoofVXTHmHsf744w8MGDAAtra2sLa2Rq9evfDLL79otSk5nHPo0CHMmDEDjo6OcHBwwJgxY3Djxo0q7z8AmvB48+ZNre3btm1Dz549YWNjg0aNGmHIkCE4depUqdsfP34cwcHBcHBwgFwuh7e3N0JCQjTXX758GU899RRatWoFa2truLm5ITg4GGfOnKlW3Q+6cOECJk6cCGdnZ8hkMnh4eODJJ59Efn4+AGDRokWQSCSlblfWobLmzZtj5MiR2LFjBzp37gy5XI7Fixejc+fO6NOnT6n7UKlUcHNzw5gxYzTb9Hm/3S8oKAiTJ08GAAQEBEAikWj1Rnz55Zfo2LEj5HI57O3t8cgjj+D8+fNa91HR56Cq+w8o/hEcNWoUmjRpArlcjk6dOmHjxo1a91NyCGHz5s2YO3cuXFxc0KhRIwQHB+PmzZvIzs7Gc889B0dHRzg6OuKpp57C3bt3te5DIpFg5syZ+PTTT9G6dWvIZDL4+vpi69atWu10fU2bN2+Oc+fOITw8XHMY9/7Pb1ZWFl599VV4eXnB0tISbm5uCAkJwb1797TuNysrC88++ywcHBzQqFEjDB06FJcuXapwv94vIyMDr7zyClq0aAGZTAYnJycMHz4cFy5c0Np3Dx4yefC7Y+rUqfjkk080+6rkUvJ88/LyMH/+fK3n8+KLL2odCpVIJPj888+Rm5uruX3Jfjtw4ADOnz+v2V5Sz7p169CxY0c0atQItra28PHxwYIFC3R67tu3b0ebNm3Qrl27ctuo1Wq8/fbbaNOmDaysrGBnZ4cOHTrgww8/1GpX2Xu1vP34oD///BOOjo4YOXKk5rW+/zBWfHw8mjZtCgBYvHixZn9U1DtY8tibNm3CnDlzoFQqYWVlhcDAwDK/P4Hi78jhw4ejUaNGcHd3xyuvvKL1uSt5/ICAANjb26Nx48bw9/fHF198gQfXDz948CCCgoLg4OAAKysreHh44NFHH0VOTo6mja7fTYWFhXj99dehVCphbW2N3r17IzIyssJ9qgv27FTB5cuXAUDzhgSKX8iHH34Y06dPx7x581BUVAS1Wo1Ro0bh6NGjeP3119GrVy8kJCQgNDQUQUFBOHHihOav9rfeegtLly7FmDFj8Morr0ChUODs2bOaQFWWiIgIjB8/HuPHj8eiRYsgl8uRkJCgOWRUnvDwcAwaNAgdOnTAF198AZlMhrVr1yI4OBhbtmzB+PHjtdo/88wzGDFiBDZv3oykpCS89tprmDx5cqWPU5G4uDiYm5ujRYsWmm3Lli3DG2+8gaeeegpvvPEGCgoK8N5776FPnz6IjIzU/PXz22+/ITg4GG3btsWqVavg4eGB+Ph4rUNiN27cgIODA9599100bdoUaWlp2LhxIwICAnDq1Cm0adOmyrWX+Pvvv9G7d284OjpiyZIlaNWqFZKTk7Fz504UFBRAJpPpfZ/R0dE4f/483njjDXh5ecHGxgaurq6YPXs2/vnnH7Rq1UrTdt++fbhx4waeeuopANDr/fagtWvXYsuWLXj77bc1h21L3t9hYWFYsGABJk6ciLCwMNy5cweLFi1Cz549ERUVpVVTWZ+D6uy/ixcvolevXnBycsJHH30EBwcHbNq0CVOnTsXNmzfx+uuva93nggUL0K9fP3z11VeIj4/Hq6++iokTJ8Lc3BwdO3bEli1bcOrUKSxYsAC2trb46KOPtG6/c+dOHDp0CEuWLIGNjQ3Wrl2ruf3YsWP1ei1//PFHjB07FgqFQnOYouQ9kZOTg8DAQFy7dg0LFixAhw4dcO7cObz11ls4c+YMDhw4AIlEAiEERo8ejWPHjuGtt95Ct27d8Oeff2LYsGE61ZCdnY3evXsjPj4ec+fORUBAAO7evYsjR44gOTkZPj4+Oj+fN998E/fu3cMPP/yAiIgIzXYXFxdNnb///jvmz5+PPn364PTp0wgNDUVERAQiIiIgk8kQERGBpUuX4tChQ5rvDy8vL0REROCFF15AZmYmvv32WwDQBM0XXngBs2bNwsqVK2FmZobLly8jNjZWp5q3b9+OcePGVdhmxYoVWLRoEd544w307dsXhYWFuHDhglZIq6nP+nfffYcnn3wSTz/9ND7++GNIpdJSbVxcXLB3714MHToU06ZNwzPPPANA+/emPAsWLIC/vz8+//xzZGZmYtGiRQgKCsKpU6e0vmsLCwvx8MMPY9q0aXjllVdw5MgRLF26FAqFAm+99ZamXXx8PKZPnw4PDw8AwF9//YVZs2bh+vXrmnbx8fEYMWIE+vTpgy+//BJ2dna4fv069u7di4KCAlhbW+v13fTss8/i66+/xquvvopBgwbh7NmzGDNmDLKzs3Xax+USVK4NGzYIAOKvv/4ShYWFIjs7W+zevVs0bdpU2NraipSUFCGEEFOmTBEAxJdffql1+y1btggAYvv27Vrbo6KiBACxdu1aIYQQV69eFVKpVDz++OMV1jNlyhTh6emp+ffKlSsFAJGRkVHubeLi4gQAsWHDBs22Hj16CCcnJ5Gdna3ZVlRUJPz8/ESzZs2EWq3Wev4vvPCC1n2uWLFCABDJyckV1ltSs42NjSgsLBSFhYXi9u3bYt26dcLMzEwsWLBA0y4xMVGYm5uLWbNmad0+OztbKJVKMW7cOM02b29v4e3tLXJzcyt9/PufX0FBgWjVqpV4+eWXNdvL2j8lzzsuLq7C++zfv7+ws7MTqamp5bYJDQ0VZX3MynoMT09PIZVKxcWLF7Xa3r59W1haWmrtLyGEGDdunHB2dhaFhYVCCN3fb+UpqSkqKkqzLT09XVhZWYnhw4drtU1MTBQymUxMmjRJs628z0F5dNl/EyZMEDKZTCQmJmptHzZsmLC2tta89w8dOiQAiODgYK12ISEhAoB46aWXtLaPHj1a2Nvba20DIKysrDSfayGK3zc+Pj6iZcuWmm36vKbt2rUTgYGBpdqGhYUJMzMzrX0thBA//PCDACD27NkjhBDi119/FQDEhx9+qNXunXfeEQBEaGhoqfu+35IlSwQAsX///nLblOy7Q4cOaW0v67Px4osvlvnc9+7dKwCIFStWaG3ftm2bACA+++wzzbaS74QHBQYGinbt2mltmzlzprCzs6voKZYrJiZGABAnT56ssN3IkSNFp06dKmyjy3u1rP14/3N99913hVQqFcuXLy9128DAQK33ya1bt3R6fR98bH9/f833txBCxMfHCwsLC/HMM89o1QRAfPfdd1r3MXz4cNGmTZtyH0OlUonCwkKxZMkS4eDgoHmckvdsTExMubfV9bvp/PnzAoDWd7QQQnz77bcCgJgyZUrFO6ICPIylgx49esDCwgK2trYYOXIklEolfv31Vzg7O2u1e/C48O7du2FnZ4fg4GAUFRVpLp06dYJSqdR0d+7fvx8qlQovvviiXnV169YNADBu3Dh89913Os0Qu3fvHo4fP46xY8dqjciXSqV44okncO3aNVy8eFHrNg8//LDWvzt06ADgv8N4arVa6/mpVKpSj2lhYQELCws4OjpixowZGD9+PN555x1Nm99++w1FRUV48sknte5LLpcjMDBQs68uXbqEK1euYNq0aZDL5eU+z6KiIixbtgy+vr6wtLSEubk5LC0t8c8//5Q6/FIVOTk5CA8Px7hx43T6i0tXHTp0QOvWrbW2OTg4IDg4GBs3boRarQYApKen4+eff8aTTz4Jc/PiDlpd32/6iIiIQG5ubqkudHd3d/Tv3x+///57qdvoMj5C1/138OBBDBgwAO7u7lrbp06dipycHK0eBgAYOXKk1r/btm0LABgxYkSp7WlpaaUOZQ0YMEDrcy2VSjF+/HhcvnwZ165dq/R56Wr37t3w8/NDp06dtF6rIUOGaB0KOXToEACUGscyadIknR7n119/RevWrTFw4MAaq70sJb00D75PHnvsMdjY2JT5PtFF9+7dkZGRgYkTJ+Lnn3/G7du3db7t9u3b0bx5c/j7+1f6GH///TdeeOEF/Pbbb6VmiVb3sy6EwPTp0xEaGorNmzeX6o2sSZMmTdI6xOrp6YlevXpp3kclJBIJgoODtbZ16NCh1JGEgwcPYuDAgVAoFJBKpbCwsMBbb72FO3fuIDU1FQDQqVMnWFpa4rnnnsPGjRtx9erVUnXp+t1U3vt93Lhxmu+5qmLY0cHXX3+NqKgonDp1Cjdu3MDp06fx0EMPabWxtrZG48aNtbbdvHkTGRkZsLS01PzYl1xSUlI0H9ySY5bNmjXTq66+ffvip59+0oSEZs2awc/PD1u2bCn3Nunp6RBCwMXFpdR1rq6uAIA7d+5obXdwcND6d0mXbW5uLgBgyZIlWs/twYHUVlZWiIqKQlRUFHbt2oWgoCBs2bIF7777rqZNydidbt26ldpX27Zt03tfzZkzB2+++SZGjx6NXbt24fjx44iKikLHjh01dVdHeno6VCqV3q9ZZcp6XQDg6aefxvXr17F//34AwJYtW5Cfn6/146Lr+00fJe+F8t4vD75XyvoclEXX/Xfnzh293qsPzuCxtLSscHteXp7WdqVSWeqxSrY9+FjVcfPmTZw+fbrU62RrawshhOa1unPnDszNzUt9Bsuqsyy3bt2q8fdoWUrqfDAMSCQSKJXKKu+7J554Al9++aVmkLGTkxMCAgI0n4OK/PDDDzoF7/nz52PlypX466+/MGzYMDg4OGDAgAE4ceIEgOp/1gsKCrBt2za0a9dO58OPVVXe+7esz+mDfyzKZDKtz0NkZCQGDx4MAFi/fj3+/PNPREVFYeHChQD++/739vbGgQMH4OTkhBdffBHe3t7w9vbWGvOk63dTSZ0PPo+yPgP64pgdHbRt21YzoLY8ZQ1YLBnQu3fv3jJvUzKtt+QL4tq1a6X+gq3MqFGjMGrUKOTn5+Ovv/5CWFgYJk2ahObNm6Nnz56l2jdp0gRmZmZITk4udV3JoGNHR0e9anjuuee0/qJ+8Pi1mZmZ1v4bNGgQunTpgsWLF+Pxxx+Hu7u75jF/+OEHeHp6lvtY9++rimzatAlPPvkkli1bprX99u3bsLOz0+l5VcTe3h5SqbTSOkq+UPLz87X2S3nBo6z3EQAMGTIErq6u2LBhA4YMGYINGzYgICBAaxaHru83fZR8wZT3fnnwvVJe/Q/Sdf85ODjU6Hu1MikpKeVuK9kX+r6mZXF0dISVlRW+/PLLcq8vecyioiLcuXNH68u+rDrL0rRpU73eo/fT5/mU1Hnr1i2twCOEQEpKiqYXuiqeeuopPPXUU7h37x6OHDmC0NBQjBw5EpcuXSr3u+L8+fM4f/681gSP8pibm2POnDmYM2cOMjIycODAASxYsABDhgxBUlKSzu/V8shkMhw6dAhDhgzBwIEDsXfvXjRp0qRK91WZ8t6/VQkKW7duhYWFBXbv3q0VjH766adSbfv06YM+ffpApVLhxIkT+PjjjxESEgJnZ2dMmDBB5++mkjpTUlLg5uamub7kM1Ad7NmpRSNHjsSdO3egUqnQtWvXUpeSQbKDBw+GVCrFunXrqvxYMpkMgYGBWL58OQCUOwLfxsYGAQEB2LFjh1YPh1qtxqZNm9CsWbNSh1Eq4+rqqvW82rdvX2mtn3zyCfLy8vD2228DKP4xNzc3x5UrV8rcVyVhqXXr1vD29saXX35Z6sv5fhKJpFTo+uWXX2rsZJAlMx2+//77Cn8USmbfnD59Wmv7rl279Hq8ksOMP/30E44ePYoTJ06UOneIru83ffTs2RNWVlbYtGmT1vZr165pDjFVha77b8CAATh48GCp2X9ff/01rK2ta/y0Cr///rvWDEGVSoVt27bB29tb85e9Pq+pTCYrsydx5MiRuHLlChwcHMp8rUoeo1+/fgCgGbRbYvPmzTo9n2HDhuHSpUsVTiYo7/ns3LmzzOcDoNRzKnkfPPg+2b59O+7du1fl98n9bGxsMGzYMCxcuBAFBQWaUyWUZfv27XB1ddX7/WFnZ4exY8fixRdfRFpaGuLj43V+r1akc+fOCA8Px7Vr1xAUFKQ5BFSe8vZzZbZs2aI1UyohIQHHjh2r0rl6Sk6ke/8g6tzcXHzzzTfl3kYqlSIgIEAzay86OhqA7t9NJXU++H7/7rvvKpzsoAv27NSiCRMm4Ntvv8Xw4cMxe/ZsdO/eHRYWFrh27RoOHTqEUaNG4ZFHHkHz5s2xYMECLF26FLm5uZg4cSIUCgViY2Nx+/ZtzcmlHvTWW2/h2rVrGDBgAJo1a4aMjAx8+OGHsLCwQGBgYLl1hYWFYdCgQejXrx9effVVWFpaYu3atTh79iy2bNmi81/n1REYGIjhw4djw4YNmDdvHry8vLBkyRIsXLgQV69exdChQ9GkSRPcvHkTkZGRsLGx0eyHTz75BMHBwejRowdefvlleHh4IDExEb/99pvmQzJy5Eh89dVX8PHxQYcOHXDy5Em89957Ndqlv2rVKvTu3RsBAQGYN28eWrZsiZs3b2Lnzp349NNPYWtri+HDh8Pe3h7Tpk3DkiVLYG5ujq+++gpJSUl6P97TTz+N5cuXY9KkSbCysio1a07X95s+7Ozs8Oabb2LBggV48sknMXHiRNy5cweLFy+GXC5HaGio3s+jhC77LzQ0FLt370a/fv3w1ltvwd7eHt9++y1++eUXrFixAgqFosqPXxZHR0f0798fb775pmY21oULF7Smn+vzmrZv3x5bt27Ftm3b0KJFC8jlcrRv3x4hISHYvn07+vbti5dffhkdOnSAWq1GYmIi9u3bh1deeQUBAQEYPHgw+vbti9dffx337t1D165d8eeff1b4g3O/kJAQbNu2DaNGjcK8efPQvXt35ObmIjw8HCNHjkS/fv2gVCoxcOBAhIWFoUmTJvD09MTvv/+OHTt2lPl8AGD58uUYNmwYpFIpOnTogEGDBmHIkCGYO3cusrKy8NBDD2lmY3Xu3BlPPPFElV6PZ599FlZWVnjooYfg4uKClJQUhIWFQaFQVNhb9MMPP2DMmDE6fZcFBwdrzqXWtGlTJCQkYPXq1fD09NTMNNTlvVqZtm3b4ujRoxg4cCD69u2LAwcOlPt9ZGtrC09PT/z8888YMGAA7O3t4ejoWOkZ9FNTU/HII4/g2WefRWZmJkJDQyGXyzF//vxK63vQiBEjsGrVKkyaNAnPPfcc7ty5g5UrV5b6I/J///sfDh48iBEjRsDDwwN5eXmaHsuSsWK6fje1bdsWkydPxurVq2FhYYGBAwfi7NmzWLlypU6HxytU5aHNJqCs2SllKW92gRBCFBYWipUrV4qOHTsKuVwuGjVqJHx8fMT06dPFP//8o9X266+/Ft26ddO069y5s9ZMiAdnY+3evVsMGzZMuLm5CUtLS+Hk5CSGDx8ujh49qmlT1owKIYQ4evSo6N+/v7CxsRFWVlaiR48eYteuXTo9//Jmb+i7b86cOSPMzMzEU089pdn2008/iX79+onGjRsLmUwmPD09xdixY8WBAwe0bhsRESGGDRsmFAqFkMlkwtvbW2sEf3p6upg2bZpwcnIS1tbWonfv3uLo0aOlZj1UZzaWEELExsaKxx57TDg4OAhLS0vh4eEhpk6dKvLy8jRtIiMjRa9evYSNjY1wc3MToaGh4vPPPy9zNtaIESMqfLxevXoJAOXO3NPn/fagit7vn3/+uejQoYOwtLQUCoVCjBo1Spw7d06rTUWvdXl02X9nzpwRwcHBQqFQCEtLS9GxY8dS7+eS9+T333+v03MqmVF169YtzTYA4sUXXxRr164V3t7ewsLCQvj4+Ihvv/22VN26vqbx8fFi8ODBwtbWVgDQ+vzevXtXvPHGG6JNmzaa/dq+fXvx8ssva80Iy8jIEE8//bSws7MT1tbWYtCgQeLChQs6z9ZJT08Xs2fPFh4eHsLCwkI4OTmJESNGiAsXLmjaJCcni7Fjxwp7e3uhUCjE5MmTxYkTJ0p9NvLz88UzzzwjmjZtKiQSidbzzc3NFXPnzhWenp7CwsJCuLi4iBkzZoj09HStevSZjbVx40bRr18/4ezsLCwtLYWrq6sYN26cOH36dLnP9/Llyzp/PwkhxPvvvy969eolHB0dNe/BadOmifj4eK12lb1XK5uNVeLatWvCx8dHNG/eXFy5ckXz3B+ctXfgwAHRuXNnIZPJKp2JVPLY33zzjXjppZdE06ZNhUwmE3369BEnTpzQalve/i9rluGXX34p2rRpI2QymWjRooUICwsTX3zxhdbrHhERIR555BHh6ekpZDKZcHBwEIGBgWLnzp1a96Xrd1N+fr545ZVXhJOTk5DL5aJHjx4iIiJCeHp6Vms2lkSIB84ORERkgiQSCV588UWsWbPG0KVQNaxYsQIrV65EcnJymeexMUaHDx9Gv3798P333+t9PihTwTE7RERkNF5//XWkpqaaTNAh3TDsEBERkVHjYSwiIiIyauzZISIiIqPGsENERERGjWGHiIiIjBpPKojiswffuHEDtra2dXJCPSIiIqo+IQSys7Ph6uoKM7Py+28YdlC8zo6+a1IRERFR/ZCUlFThGfIZdvDfImRJSUnVPyU1ERER1YmsrCy4u7tXumQHww7+W6m5cePGDDtEREQNTGVDUDhAmYiIiIwaww4REREZNYYdIiIiMmoMO0RERGTUGHaIiIjIqDHsEBERkVFj2CEiIiKjxrBDRERERo1hh4iIiIwaz6BcT6nUApFxaUjNzoOTrRzdvewhNav4DJEFRWp8ExGPhLQceNpb44mezWFpzjxLRESmzaC/hEeOHEFwcDBcXV0hkUjw008/aV0vhMCiRYvg6uoKKysrBAUF4dy5c1pt8vPzMWvWLDg6OsLGxgYPP/wwrl27VofPoubtPZuM3ssPYuL6vzB7awwmrv8LvZcfxN6zyeXeJmxPLHze/BVLfzmPryMSsPSX8/B581eE7Ymtw8qJiIjqH4OGnXv37qFjx45Ys2ZNmdevWLECq1atwpo1axAVFQWlUolBgwYhOztb0yYkJAQ//vgjtm7dij/++AN3797FyJEjoVKp6upp1Ki9Z5MxY1M0kjPztLanZOZhxqboMgNP2J5YfHokDmqhvV0tgE+PxDHwEBGRSZMIIUTlzWqfRCLBjz/+iNGjRwMo7tVxdXVFSEgI5s6dC6C4F8fZ2RnLly/H9OnTkZmZiaZNm+Kbb77B+PHjAQA3btyAu7s79uzZgyFDhuj02FlZWVAoFMjMzDToQqAqtUDv5QdLBZ37NbWVYfvzvWD2b0wtLBLo//5hVPQimkmAC0uH8ZAWEREZFV1/v+vtmJ24uDikpKRg8ODBmm0ymQyBgYE4duwYpk+fjpMnT6KwsFCrjaurK/z8/HDs2LFyw05+fj7y8/M1/87Kyqq9J6KHyLi0CoMOANzKzkff9w7pdb9qAXwTEY9pfVpUpzwiIqIGqd7+qZ+SkgIAcHZ21tru7OysuS4lJQWWlpZo0qRJuW3KEhYWBoVCobm4u7vXcPVVk5pdcdApYSGVQGZuBpm5GaSVLGtfIiEtpzqlERERNVj1NuyUkDzwYy6EKLXtQZW1mT9/PjIzMzWXpKSkGqm1upxs5Tq1+/rpAFx8exguvj0MC4b76HQbT3vr6pRGRETUYNXbsKNUKgGgVA9NamqqprdHqVSioKAA6enp5bYpi0wmQ+PGjbUu9UF3L3u4KOQoL6ZJALgoiqehl3iiZ3NUMiMdZpLidkRERKao3oYdLy8vKJVK7N+/X7OtoKAA4eHh6NWrFwCgS5cusLCw0GqTnJyMs2fPato0JFIzCUKDfQGgVOAp+XdosK/W+XYszc3wbB+vCu/32T5eHJxMREQmy6ADlO/evYvLly9r/h0XF4eYmBjY29vDw8MDISEhWLZsGVq1aoVWrVph2bJlsLa2xqRJkwAACoUC06ZNwyuvvAIHBwfY29vj1VdfRfv27TFw4EBDPa1qGerngnWT/bF4V6zWYGWlQo7QYF8M9XMpdZv5w4sD0vqj2tPPzSTFQafkeiIiIlNk0Knnhw8fRr9+/UptnzJlCr766isIIbB48WJ8+umnSE9PR0BAAD755BP4+flp2ubl5eG1117D5s2bkZubiwEDBmDt2rV6DTquL1PP71fVMyi/99sFrD8aB0tzM/z91mBYWUrrqGIiIqK6pevvd705z44h1cewU1UqtUCHRb/hXoEKv87ug7YuDfv5EBERlUfX328O5DAyUjMJOnsUT8U/mZBeSWsiIiLjx7BjhPw9i8NONMMOERERw44x6vJv2DmZyLBDRETEsGOEOrnbQSIBEu7k4FZ2fuU3ICIiMmIMO0ZIYWWB1k62AIBo9u4QEZGJY9gxUhy3Q0REVIxhx0hpxu0w7BARkYlj2DFSJWHn9PVM5BepDFwNERGR4TDsGKnmDtawt7FEQZEa525kGbocIiIig2HYMVISiQT+Hhy3Q0RExLBjxEoOZXFGFhERmTKGHSN2/yBlLoFGRESmimHHiHVopoC5mQQ3s/JxPSPX0OUQEREZBMOOEZNbSNHOtXgVWE5BJyIiU8WwY+R4ckEiIjJ1DDtGjouCEhGRqWPYMXIlYed8cjbu5RcZuBoiIqK6x7Bj5FwUVnBVyKFSC/x9LcPQ5RAREdU5hh0TwHE7RERkyhh2TAAXBSUiIlPGsGMC/juTcgbUap5ckIiITAvDjglo69IYcgszZOYW4urtu4Yuh4iIqE4x7JgAC6kZOjazA8BDWUREZHoYdkwEx+0QEZGpYtgxEQw7RERkqhh2TERnj+Kwc+XWPaTfKzBwNURERHWHYcdE2NtYokVTGwDAqST27hARkelg2DEhXTx4KIuIiEwPw44J0ZxvJyHDsIUQERHVIYYdE1ISdmKSMlCkUhu4GiIiorrBsGNCvJs2QmO5OXILVbiQkm3ocoiIiOoEw44JMTOTaBYF5bgdIiIyFQw7Jsafg5SJiMjEMOyYGJ5ckIiITA3Djonp6G4HMwlwPSMXKZl5hi6HiIio1jHsmJhGMnP4KBsDAKIT2btDRETGj2HHBPFQFhERmRKGHRPEsENERKaEYccElYSdczcykVeoMnA1REREtYthxwQ1a2KFprYyFKoEzlzPNHQ5REREtYphxwRJJBIuCkpERCaDYcdEcdwOERGZCoYdE+WvWQE9HUIIA1dDRERUexh2TJSfW2NYSs1w514BEu7kGLocIiKiWsOwY6Jk5lK0b6YAwENZRERk3Bh2TJhm3A7PpExEREaMYceElayAHs2eHSIiMmIMOybM39MOAHDxZjay8woNWwwREVEtYdgxYU62cnjYW0MIICYpw9DlEBER1QqGHRPH8+0QEZGxY9gxcf4MO0REZOQYdkycv4cdACAmMQMqNU8uSERExodhx8S1cbaFjaUU2flF+Cc129DlEBER1TiGHRNnLjVDp397d3goi4iIjBHDDnEFdCIiMmoMO6S1KCgREZGxYdghdP63Zyf+Tg5u3803cDVEREQ1q16HnaKiIrzxxhvw8vKClZUVWrRogSVLlkCtVmvaCCGwaNEiuLq6wsrKCkFBQTh37pwBq254FFYWaO3cCAB7d4iIyPjU67CzfPly/O9//8OaNWtw/vx5rFixAu+99x4+/vhjTZsVK1Zg1apVWLNmDaKioqBUKjFo0CBkZ3NmkT64KCgRERmreh12IiIiMGrUKIwYMQLNmzfH2LFjMXjwYJw4cQJAca/O6tWrsXDhQowZMwZ+fn7YuHEjcnJysHnzZgNX37BwUVAiIjJW9Trs9O7dG7///jsuXboEAPj777/xxx9/YPjw4QCAuLg4pKSkYPDgwZrbyGQyBAYG4tixY+Xeb35+PrKysrQupq6kZ+fva5koKFJX0pqIiKjhMDd0ARWZO3cuMjMz4ePjA6lUCpVKhXfeeQcTJ04EAKSkpAAAnJ2dtW7n7OyMhISEcu83LCwMixcvrr3CGyAvRxs0sbZAek4hzt3I1AxaJiIiaujqdc/Otm3bsGnTJmzevBnR0dHYuHEjVq5ciY0bN2q1k0gkWv8WQpTadr/58+cjMzNTc0lKSqqV+hsSiUTCRUGJiMgo1euenddeew3z5s3DhAkTAADt27dHQkICwsLCMGXKFCiVSgDFPTwuLi6a26Wmppbq7bmfTCaDTCar3eIbIH/PJjhwPhXRHKRMRERGpF737OTk5MDMTLtEqVSqmXru5eUFpVKJ/fv3a64vKChAeHg4evXqVae1GoP7z6QsBBcFJSIi41Cve3aCg4PxzjvvwMPDA+3atcOpU6ewatUqPP300wCKD72EhIRg2bJlaNWqFVq1aoVly5bB2toakyZNMnD1DU+HZnYwN5PgZlY+bmTmwc3OytAlERERVVu9Djsff/wx3nzzTbzwwgtITU2Fq6srpk+fjrfeekvT5vXXX0dubi5eeOEFpKenIyAgAPv27YOtra0BK2+YrCylaOfaGH9fy8TJhHSGHSIiMgoSweMVyMrKgkKhQGZmJho3bmzocgxq8a5z2PBnPKb2ao5FD7czdDlERETl0vX3u16P2aG6xxlZRERkbBh2SEtJ2IlNzkJOQZGBqyEiIqo+hh3S4qKwgotCDpVa4O+kTEOXQ0REVG0MO1SK/7+9OzzfDhERGQOGHSrl/vPtEBERNXQMO1RKl/t6dtRqk5+sR0REDRzDDpXi69oYcgszZOQU4urte4Yuh4iIqFoYdqgUC6kZOjSzAwBE81AWERE1cAw7VCaeb4eIiIwFww6VSTNImTOyiIiogWPYoTKVTD+/nHoXGTkFBq6GiIio6hh2qEz2NpZo4WgDADiVmGHYYoiIiKqBYYfK5c9xO0REZAQYdqhcHKRMRETGgGGHylUSdmKSMlCkUhu4GiIioqph2KFytWzaCLZyc+QWqnAhJdvQ5RAREVUJww6Vy8xMAn8PLgpKREQNG8MOVajkUNZvZ1Pwc8x1RFy5AxXXyyIiogbE3NAFUP2mVheP1fnzyh38eeUOAMBFIUdosC+G+rkYsjQiIiKdsGeHyrX3bDJW/3651PaUzDzM2BSNvWeTDVAVERGRfhh2qEwqtcDiXbFlXldyEGvxrlge0iIionqPYYfKFBmXhuTMvHKvFwCSM/Nw7PLtuiuKiIioCjhmh8qUml1+0LnflA2RaO1si7YujdHWpeS/jeHYSKbX46nUApFxaUjNzoOTrRzdvewhNZNUpXQiIiItDDtUJidbuU7t1AK4kJKNCynZ+PHUf9ub2so0Acj33wDUwtEG5tLSnYl7zyZj8a5YrZ4kDoImIqKaIhFCmPygi6ysLCgUCmRmZqJx48aGLqdeUKkFei8/iJTMPJT1BpEAcG4sw/fP98LFlGzEJmfh/L+XhLQclPWusjQ3QxtnW60eoOvpOXj1+9OlHqOkT2fdZP8aCzzsPSIiMi66/n4z7IBhpzx7zyZjxqZoANAKI5UFkXv5RbiQkq0JP+eTs3AhJRs5BSq9a3BRyPHH3P7VDiXsPSIiMj4MO3pg2ClfTYUEtVogMS1HE35ik7MRk5SO23cLKr2tm50VvBxt4NxYDqVCBufG8uL/byyHUiGHYyNZhWGoJLTVRe8RERHVHYYdPTDsVKy2Dv/8HHMds7fGVPt+pGYSNG0kg7NCDmdbGZSK/8JQU1sZXvkuBrfKCVUSAMoa6j0iIqK6pevvNwcoU6WkZhL09Hao8fvVdRD0wuFtYW9jiZSsPNzMykNKZvF/b2blIzU7Dyq1QEpWHlKydJtBdr+SKfSRcWm18hyJiMjwGHbIYLp72cNFIa9wELRSIcfTvb3K7XVRqQVu381HSmaeVhgq+f/LqXdxMyu/0lp0nWpPREQND8MOGYzUTILQYF/M2BQNCcoeBB0a7Fvh4SWpmUQzhqdjGddHXLmDiev/qrQWXXuZiIio4eEZlMmghvq5YN1kfygV2mFDqZDXyMDhkt6j8uKSBMUDrrt72VfrcYiIqP5izw4Z3FA/FwzyVdbKIOia6D0iIqKGjbOxwNlYpoDn2SEiMj6cjUV0n5Leo0c++ROnr2fihSBvvDK4DXt0iIhMAMfskMmQmkmgsLYAALRybsSgQ0RkIhh2iIiIyKgx7BAREZFRY9ghIiIio8awQ0REREaNYYdMRkGRGtfTcwEAhy/cQkGR2sAVERFRXWDYIZMQticWPm/+iqu37wEAfv77Bnze/BVhe2INXBkREdU2nmeHjF7Ynlh8eiSu1Ha1gGb7/OG+dV0WERHVEfbskFErKFJj/dHSQed+64/G8ZAWEZERY9gho/ZNRDzUlSyIohbF7YiIyDgx7JBRS0jL0ald/J17tVwJEREZCsMOGTVPe2ud2v16NgUbj8UjK6+wlisiIqK6xlXPwVXPjVlBkRo+b/5a6aGsEtaWUozq5IbJPTzQzlVRu8UREVG16Pr7zZ4dMmqW5mZ4to9XhW2m9vLE4ofboaVTI+QUqLAlMhEjPvoDY9b+iR3R15BXqKqjaomIqDawZwfs2TEFYXtisf5onFYPj5kEeLaPl2bauRACx+PS8M1fCfjtbAqK/m3cxNoC47q6Y1KABzwdbAxRPhERlUHX32+GHTDsmIqCIjW+iYhHQloOPO2t8UTP5rA0L7tzMzU7D99FJWHz8UTcyMzTbO/buime6OGJ/j5OkJpJaqVOlVogMi4Nqdl5cLKVo7uXfa09FhFRQ8awoweGHSpPkUqNQxdvYdNfCQi/dEuz3VUhx6QAD4zr5g4nW3mNPd7es8lYvCsWyfcFLBeFHKHBvhjq51Jjj0NEZAwYdvTAsEO6SLhzD5uPJ+K7E0lIzymetWVuJsEQPyWe6OGJAC97SCRV74HZezYZMzZF48EPZMk9rpvsz8BDRHQfhh09MOyQPvIKVdhzJhmb/kpAdGKGZnsrp0aY3MMTj/i7obHcotTtKjo8pVIL9F5+UKtH534SAEqFHH/M7c9DWkRE/2LY0QPDDlXVuRuZ2PRXIn6OuY6cguJZW8XT110xuYenZvp6ZYenIq7cwcT1f1X6eFue7YGe3g6182SIiBoYhh09MOxQdWXlFeKnU9fxTUQC/km9q9ne2cMOHZsp8NWxhFK3kQAQAB7p7IqTCRlI1OFsz2+MaItn+rTQ2sYBzURkqhh29MCwQzVFiOLgsel4IvaeTUahquY/Xn1bN8WTPTzRz8cJ+2NTOKCZiEwWw44eGHaoNtzKzsfK3y5i24mkSts+/VBz7D6djFvZ+aUGKJeQmZuhQKVGySfW3sYSafcKSrXjgGYiMhVGcwbl69evY/LkyXBwcIC1tTU6deqEkydPaq4XQmDRokVwdXWFlZUVgoKCcO7cOQNWTFSsqa0MvVrqNr6mo7sdloxqB+C/sFJC8u/lwwmdcPjVIDzXtwXsrCzKDDoANGFp8a5YqHRdJ4OIyIjV67CTnp6Ohx56CBYWFvj1118RGxuL999/H3Z2dpo2K1aswKpVq7BmzRpERUVBqVRi0KBByM7ONlzhRP/S9Rw8TrZyDPVzwbrJ/lAqtG+jVMg1vTSeDjZYMLwtPpzQucL7EwCSM/MQGZdW1dKJiIyGuaELqMjy5cvh7u6ODRs2aLY1b95c8/9CCKxevRoLFy7EmDFjAAAbN26Es7MzNm/ejOnTp9d1yURaunvZw0UhR0pmXpmHp0qmlHf3sgcADPVzwSBfZaUDjjNyy+7VeVBqdtlT2YmITEm97tnZuXMnunbtisceewxOTk7o3Lkz1q9fr7k+Li4OKSkpGDx4sGabTCZDYGAgjh07Vu795ufnIysrS+tCVBukZhKEBhevvVXW4SkACA321QozUjMJeno7YFQnN/T0dihzZpU+PUZERKauXoedq1evYt26dWjVqhV+++03PP/883jppZfw9ddfAwBSUlIAAM7Ozlq3c3Z21lxXlrCwMCgUCs3F3d299p4EmTxdDk/pq6THqLwJ5hIUz8oq6TEiIjJl9fowllqtRteuXbFs2TIAQOfOnXHu3DmsW7cOTz75pKbdg6foF0JUeNr++fPnY86cOZp/Z2VlMfBQrdL18JSuSnqMZmyKLnVdeT1GRESmql737Li4uMDX11drW9u2bZGYmAgAUCqVAFCqFyc1NbVUb8/9ZDIZGjdurHUhqm26HJ7SR0mPkYONpdb26vQYEREZo3oddh566CFcvHhRa9ulS5fg6ekJAPDy8oJSqcT+/fs11xcUFCA8PBy9evWq01qJDGGonwv+90QXAIBjI0tsebYH/pjbn0GHiOg+VTqMde/ePbz77rv4/fffkZqaCrVarXX91atXa6S4l19+Gb169cKyZcswbtw4REZG4rPPPsNnn30GoPjwVUhICJYtW4ZWrVqhVatWWLZsGaytrTFp0qQaqYGovjP795Ctjcyc62YREZWhSmHnmWeeQXh4OJ544gm4uLhUOD6mOrp164Yff/wR8+fPx5IlS+Dl5YXVq1fj8ccf17R5/fXXkZubixdeeAHp6ekICAjAvn37YGtrWys1ERERUcNSpeUi7Ozs8Msvv+Chhx6qjZrqHJeLoIbsZEI6Hl13DJ4O1gh/rZ+hyyEiqjO1ulxEkyZNYG/PKa1ERERU/1Up7CxduhRvvfUWcnJyaroeIiIiohpVpTE777//Pq5cuQJnZ2c0b94cFhYWWtdHR5c+9wcRERGRIVQp7IwePbqGyyAiIiKqHVUKO6GhoTVdBxEREVGtqNZyESdPnsT58+chkUjg6+uLzp0711RdRERERDWiSmEnNTUVEyZMwOHDh2FnZwchBDIzM9GvXz9s3boVTZs2rek6iYiIiKqkSrOxZs2ahaysLJw7dw5paWlIT0/H2bNnkZWVhZdeeqmmayQiIiKqsir17OzduxcHDhxA27ZtNdt8fX3xySefYPDgwTVWHBEREVF1ValnR61Wl5puDgAWFhal1skiIiIiMqQqhZ3+/ftj9uzZuHHjhmbb9evX8fLLL2PAgAE1VhwRVU7974ov9/KLEHHlDlRqvVeAISIyalUKO2vWrEF2djaaN28Ob29vtGzZEl5eXsjOzsbHH39c0zUSUTn2nk3G89+cBADcvluAiev/Qu/lB7H3bLKBKyMiqj+qtBBoif379+PChQsQQsDX1xcDBw6sydrqDBcCpYZo79lkzNgUjQc/wJJ//7tusj+G+rnUdVlERHVG19/vaoUdY8GwQw2NSi3Qe/lBJGfmlXm9BIBSIccfc/tDaiYpsw0RUUOn6++3zrOxPvroIzz33HOQy+X46KOPKmzL6edEtSsyLq3coAMAAkByZh4i49LQ09uh7gojIqqHdA47H3zwAR5//HHI5XJ88MEH5baTSCQMO0S1LDW7/KBTlXZERMZM57ATFxdX5v8TUd1zspXr1C72RhaCO7jCjIeyiMiEVWk21oNUKhViYmKQnp5eE3dHRJXo7mUPF4UclUWYT49cRfCaP/DX1TuabSq1QMSVO/g55jqnqhORSajSAOWQkBC0b98e06ZNg0qlQt++fREREQFra2vs3r0bQUFBtVBq7eEAZWqISmZjAdCakSX5999j/N2w/9xNZOcXAQCGtlPioZYOWHv4itZ4HxeFHKHBvpy5RUQNjq6/31Xq2fnhhx/QsWNHAMCuXbsQHx+PCxcuICQkBAsXLqxaxUSkl6F+Llg32R9KhfYhLaVCjv9N9seqcZ1w+LUgTO7hATMJsPdcCt78+Vypgc0pmXmYsSma5+YhIqNVpZ4duVyOy5cvo1mzZnjuuedgbW2N1atXIy4uDh07dkRWVlZt1Fpr2LNDDZlKLRAZl4bU7Dw42crR3cu+1HTz2BtZGL32TxQUlb2cC6eqE1FDVKs9O87OzoiNjYVKpcLevXs1JxPMycmBVCqtWsVEVCVSMwl6ejtgVCc39PR2KDOsZOYWlht0AO2p6kRExqZKq54/9dRTGDduHFxcXCCRSDBo0CAAwPHjx+Hj41OjBRJR9XGqOhGZsiqFnUWLFsHPzw9JSUl47LHHIJPJAABSqRTz5s2r0QKJqPp0naquazsiooakSmEHAMaOHVtq25QpU6pVDBHVjpKp6imZeaXW0gL+G7PT3cu+rksjIqp1XC6CyARIzSQIDfbFjE3RmqnpJUpG+IQG+3JwMhEZJZ1nY3l5eeHEiRNwcHCAl5dX+XcokeDq1as1VmBd4GwsMhV7zyZj8a5YnmeHiIwCVz3XA8MOmRKVWmD21lPYfToZIzu44MMJndmjQ0QNUq1OPSeihktqJoGrnRUAwM3OikGHiIxelcLO2LFj8e6775ba/t577+Gxxx6rdlFERERENaVKYSc8PBwjRowotX3o0KE4cuRItYsiIiIiqilVCjt3796FpaVlqe0WFhYNbqkIIiIiMm5VCjt+fn7Ytm1bqe1bt26Fr69vtYsiIiIiqilVOqngm2++iUcffRRXrlxB//79AQC///47tmzZgu+//75GCySimqVSC9zIyAUAXM/IhUotOEiZiIxalaee//LLL1i2bBliYmJgZWWFDh06IDQ0FIGBgTVdY63j1HMyFTzPDhEZE55nRw8MO2QK9p5NxoxN0aWWiyjp01k32Z+Bh4galFo/z05GRgY+//xzLFiwAGlpaQCA6OhoXL9+vap3SUS1RKUWWLwrtsx1sUq2Ld4VC5Xa5P/2ISIjVKUxO6dPn8bAgQOhUCgQHx+PZ555Bvb29vjxxx+RkJCAr7/+uqbrJKJqiIxL0zp09SABIDkzD5Fxaejp7VB3hRER1YEq9ezMmTMHU6dOxT///AO5XK7ZPmzYMJ5nh6geSs0uP+hUpR0RUUNSpbATFRWF6dOnl9ru5uaGlJSUahdFRDXLyVZeeSMAjo1ktVwJEVHdq1LYkcvlZZ488OLFi2jatGm1iyKimtXdyx4uCjkqm2C+8reLOJ/ME4MSkXGpUtgZNWoUlixZgsLCQgCARCJBYmIi5s2bh0cffbRGCySi6pOaSRAaXHzCz/ICj9zCDKeSMjDy4z+wbM955BQU1V2BRES1qEphZ+XKlbh16xacnJyQm5uLwMBAtGzZEra2tnjnnXdqukYiqgFD/VywbrI/lArtQ1ouCjn+N9kfh1/th2F+SqjUAp8duYpBq45gf+xNA1VLRFRzqnWenYMHDyI6OhpqtRr+/v4YOHBgTdZWZ3ieHTIlKrVAZFwaUrPz4GQrR3cve60zKB+8cBNv/XwO19KLz7I82NcZix5uB1c7K0OVTERUplo7qWBRURHkcjliYmLg5+dX7ULrA4YdIm25BSp8+Ps/+PzoVRSpBawtpZgzqDWm9moOc2mVT89FRFSjau2kgubm5vD09IRKpapWgURUf1lZSjFvmA9+eakPuno2QU6BCm//ch7Ba/7EqcR0Q5dHRKSXKv2J9sYbb2D+/PmaMycTkXFqo7TFd9N7Yvmj7WFnbYHzyVkYs+4Y3vjpDDJzCw1dHhGRTqo0Zqdz5864fPkyCgsL4enpCRsbG63ro6Oja6zAusDDWESVu3M3H8v2XMD26GsAis/J8+bItni4oyskEq6aTkR1T9ff7yotFzF69GhIJBJwDVEi0+HQSIb3x3XE2C7NsPCnM7h66x5mb43BDyevYekoPzR3tKn8ToiIDECvnp2cnBy89tpr+Omnn1BYWIgBAwbg448/hqOjY23WWOvYs0Okn/wiFT4Lv4qPD11GQZEaluZmmNmvJaYHtoDMXGro8ojIRNTKbKzXXnsNa9euxeOPPw4rKyts3rwZQUFB+P7772ukaENh2CGqmvjb9/Dmz2dx9J/bAIAWTW3w9mg/9PL+7w+gyqa6ExFVVa2EHW9vb7zzzjuYMGECACAyMhIPPfQQ8vLyIJU23L/mGHaIqk4IgV2nk7FkVyxu380HAIzxd8PC4W0RFZ+GxbtitVZcd1HIERrsi6F+LoYqmYiMRK2EHUtLS8TFxcHNzU2zzcrKCpcuXYK7u3v1KjYghh2i6svMLcTK3y5i0/EECAFYW0qRU1D6FBUlfTrrJvsz8BBRtdTKeXZUKhUsLS21tpmbm6OoiGvoEJk6hZUFlo72w44ZveDr0rjMoAMAJX9dLd4VC5WakxyIqPbpNRtLCIGpU6dCJpNptuXl5eH555/Xmn6+Y8eOmquQiBqUzh5NsGB4W0z+4ni5bQSA5Mw8RMaloae3Q90VR0QmSa+wM2XKlFLbJk+eXGPFEJFxuHMvX6d2qdl5lTciIqomvcLOhg0baqsOIjIiTrbyyhvp0Y6IqDoa1Ip+YWFhkEgkCAkJ0WwTQmDRokVwdXWFlZUVgoKCcO7cOcMVSUTo7mUPF4UcFU0wt7aUomMzRZ3VRESmq8GEnaioKHz22Wfo0KGD1vYVK1Zg1apVWLNmDaKioqBUKjFo0CBkZ2cbqFIikppJEBrsCwDlBp6cAhXGrDuGy6l3664wIjJJDSLs3L17F48//jjWr1+PJk2aaLYLIbB69WosXLgQY8aMgZ+fHzZu3IicnBxs3rzZgBUT0VA/F6yb7A+lQvtQlYtCjln9W8LBxhIXUrLx8Jo/8MPJawaqkohMQZUWAq1rU6ZMgb29PT744AMEBQWhU6dOWL16Na5evQpvb29ER0ejc+fOmvajRo2CnZ0dNm7cWOb95efnIz//vwGUWVlZcHd353l2iGpBeWdQTs3KQ8i2GBy7cgcAMKazG5aO9oONrEpL9hGRCaqV8+wYwtatWxEdHY2wsLBS16WkpAAAnJ2dtbY7OztrritLWFgYFAqF5tKQT4hIVN9JzSTo6e2AUZ3c0NPbQbNUhFNjOb6ZFoBXBrWGmQTYceo6gj/+A+duZBq4YiIyNvU67CQlJWH27NnYtGkT5PLyZ21IJNqjAoQQpbbdb/78+cjMzNRckpKSaqxmItKd1EyCWQNaYetzPaFsLMfV2/fwyNpj+DoiHg2g05mIGoh6HXZOnjyJ1NRUdOnSBebm5jA3N0d4eDg++ugjmJuba3p0HuzFSU1NLdXbcz+ZTIbGjRtrXYjIcLp72ePX2X0wwMcJBUVqvPXzOczYFI3MnEJDl0ZERqBeh50BAwbgzJkziImJ0Vy6du2Kxx9/HDExMWjRogWUSiX279+vuU1BQQHCw8PRq1cvA1ZORPpqYmOJz6d0xZsjfWEhlWDvuRQM/+goTiakG7o0Imrg6vVIQFtbW/j5+Wlts7GxgYODg2Z7SEgIli1bhlatWqFVq1ZYtmwZrK2tMWnSJEOUTETVIJFIMK23F7o1b4JZW04h4U4Oxn0agVcHt8H0vi1gZlbRmXuIiMpWr3t2dPH6668jJCQEL7zwArp27Yrr169j3759sLW1NXRpRFRFHZrZYfes3gju6AqVWmD53guY+lUUbt/VbRkKIqL7NYip57VN16lrRFS3hBDYFpWERbvOIa9Qjaa2Mnw4vhN6tXQ0dGlEVA8YzdRzIjJdEokEE7p7YOfM3mjl1Ai3svPx+BfH8f6+iyhSqQ1dHhE1EAw7RFTvtXa2xc6ZvTGhmzuEAD4+eBmT1h9HcmauoUsjogaAYYeIGgQrSynefbQDPpzQCY1k5oiMT8OwD4/iQOxNQ5dGRPUcww4RNSijOrlh96zeaO+mQEZOIZ75+gSW7IpFQVHxYS2VWiDiyh38HHMdEVfuQKU2+WGJRCaPA5TBAcpEDVF+kQrLf72IL/+MAwC0d1NgXNdmWHv4CpIz8zTtXBRyhAb7Yqifi6FKJaJaouvvN8MOGHaIGrIDsTfx6g9/I6Ocsy2XnJln3WR/Bh4iI8PZWERkEgb6OmPXzN6wkJZ9wsGSv+YW74rlIS0iE8WwQ0QN3rX0XBSqyg8yAkByZh4i49LqrigiqjcYdoiowUvNzqu8EYDrGTm1XAkR1UcMO0TU4DnZynVqt2hnLJbsisU/N7NruSIiqk8YdoiowevuZQ8XhRwVLRNqJgHu5hfhyz/jMOiDIxi77hi2n7yGvEJVndVJRIbB2VjgbCwiY7D3bDJmbIoG8N+gZOC/2VifTOoMK0tzbIlMxO8XUjWDlRvLzfFIZzdMDPCAj5Kff6KGhFPP9cCwQ2Qc9p5NxuJdsZWeZ+dmVh6+P5GErVFJuJb+35ITnT3sMLG7B0Z2cIG1pXmd1k5E+mPY0QPDDpHxUKkFIuPSkJqdBydbObp72UNqVvYBLrVa4I/Lt7ElMhH7Y2+i6N/eHluZOUZ1dsXE7h5o56qoy/KJSA8MO3pg2CGiW9n5+OHkNWyNSkTCnf9mbXVspsDE7h4I7ugKGxl7e4jqE4YdPTDsEFEJtVog4uodbI5MxL5zKZrz99hYSvFwJzdM6u6B9s3Y20NUHzDs6IFhh4jKcuduPrZHX8OWyCTE3b6n2d7OtTEmdvfAqE6usJVbGLBCItPGsKMHhh0iqogQAsfj0rAlMhG/nklBgap4hXUrCyke7uiKCd3d0cndDhJJRZPfiaimMezogWGHiHSVfq8A26OvYWtUEi6n3tVs91HaYlKAB0Z1coPCir09RHWBYUcPDDtEpC8hBE4kpGPL8UT8ciYZ+UXFvT1yCzOMaO+KSQHu8Pdowt4eolrEsKMHhh0iqo7MnEL8eKp4bM/F+5aiaO3cCBO6eWCMvxvsrC0NWCGRcWLY0QPDDhHVBCEETiVlYMvxROw6fQN5hcW9PZbmZhjR3gUTu3ugW3P29hDVFIYdPTDsEFFNy8orxM+nrmNzZBLOJ2dptns3tcHE7h4Y498M9jbs7SGqDoYdPTDsEFFtEULg9LVMbIlMxM6/byCnoHjhUUupGYb6KTGhuzt6tnBgbw9RFTDs6IFhh4jqQnZeIXb9nYwtkYk4cz1Ts93L0QYTurnj0S7N4NhIZsAKiRoWhh09MOwQUV07e724t+fnmBu4m18EALCQSjDYV4mJ3T3Qy9sBZuWs6UVExRh29MCwQ0SGci+/CLtP38DmyCT8nZSh2e5hb43x3dzxWNdmcLKVG65AonqMYUcPDDtEVB/E3sjC1qhE/Bh9Hdn/9vaYm0kwsK0zJgZ4oE9LR/b2EN2HYUcPDDtEVJ/kFBThl9PJ2BqVhJMJ6ZrtbnZWmNDNHY91dYdSwd4eIoYdPTDsEFF9dTElG1siE7Ej+hqy8op7e6RmEvRr44RJAe4IbO0EKXt7yEQx7OiBYYeI6ru8QhV+PZuMLceTEBmfptnuqpBjXDd3jOvqDlc7KwNWSFT3GHb0wLBDRA3J5dRsbIlMwvboa8jIKQQAmEmAoDZOmNjdA/3aNIW51MzAVRLVPoYdPTDsEFFDlFeowm/nUrAlMhF/Xf2vt8e5sQzjuhb39rjbWxuwQqLaxbCjB4YdImrort66i21RSfj+5DWk3SsAAEgkQN9WTTGxuzsGtHWGRRm9PSq1QGRcGlKz8+BkK0d3L3uOAaIGg2FHDww7RGQsCorU2B97E1siE/HH5dua7Y6NZBjXtRkmdPOAh0Nxb8/es8lYvCsWyZl5mnYuCjlCg30x1M+lzmsn0hfDjh4YdojIGCXcuYetUUn4/sQ13L6br9neu6Uj2rrYYv3RuFK3KenTWTfZn4GH6j2GHT0w7BCRMStUqfH7+ZvYHJmEo//cQmXf+hIASoUcf8ztz0NaVK/p+vvN4fpEREbOQmqGoX4u+Prp7jjyWj880tmtwvYCQHJmHiLj0ipsR9RQMOwQEZkQd3trBLVpqlPb1Oy8yhsRNQAMO0REJkbXhUXtrCxruRKiusGwQ0RkYrp72cNFIUdlo3Hm/3gaP8dch1pt8kM7qYFj2CEiMjFSMwlCg30BoNzA08TaAjcy8jB7awweWfsnouI5focaLoYdIiITNNTPBesm+5daPd1FIcf/JvsjYv4AvDakDWwspfj7WiYe+18Env/mJOJv3zNQxURVx6nn4NRzIjJdlZ1B+VZ2Plbtv4RtUYlQC8BCKsGTPZvjpf6toLC2MGDlRDzPjl4YdoiIKnYxJRvL9pxH+KVbAACFlQVmD2iFyT08YWnOgwRkGAw7emDYISLSTfilW1j2y3lcvJkNAGjuYI35w9tisK8zJBKegJDqFsOOHhh2iIh0V6RS4/uT1/D+vou4fbd40dHuXvZ4Y0RbdGhmZ9jiyKQw7OiBYYeISH9384vwv8NXsP7oVeQXqQEAj3R2w2tD2sDVzsrA1ZEpYNjRA8MOEVHV3cjIxXu/XcSPp64DAGTmZni2Tws8H+SNRjJzA1dHxoxhRw8MO0RE1Xf6Wgbe/uW8Zk0tx0YyvDK4NcZ1deeColQrGHb0wLBDRFQzhBDYF3sTYXvOI/5ODgCgjbMtFo5oi76tdVuTi0hXDDt6YNghIqpZBUVqbPorAR/+/g8ycwsBAIGtm2LhiLZo7Wxr4OrIWDDs6IFhh4iodmTkFODjg5fxdUQ8ClUCZhJgfDcPzBnUGk1tZYYujxo4hh09MOwQEdWu+Nv38O6vF7D3XAoAwMZSihf6tcS03l6QW0gNXB01VAw7emDYISKqG5FxaXjnl1j8fS0TAOCqkOP1oT54uKMrzDiImfTEsKMHhh0iorqjVgvs/PsGVuy9gBuZeQCAjs0UWDjCF9297A1cHTUkDDt6YNghIqp7eYUqfPFHHNYeuox7BSoAwNB2Sswb5oPmjjYGro4aAoYdPTDsEBEZzq3sfHxw4BK2Rv63svoTPZrjpQEtYWdtaejyqB7T9fe7Xi9VGxYWhm7dusHW1hZOTk4YPXo0Ll68qNVGCIFFixbB1dUVVlZWCAoKwrlz5wxUMRER6auprQzLHmmPX2f3RWDrpihUCXz5ZxwC3zuML/6IQ8G/S1EQVVW9Djvh4eF48cUX8ddff2H//v0oKirC4MGDce/ePU2bFStWYNWqVVizZg2ioqKgVCoxaNAgZGdnG7ByIiLSVxulLTY+3R1fP90dbZxtkZlbiKW7YzH4g3DsPZsCHoigqmpQh7Fu3boFJycnhIeHo2/fvhBCwNXVFSEhIZg7dy4AID8/H87Ozli+fDmmT5+u0/3yMBYRUf2iUgt8dyIJ7++7hNt38wFwZXUqzSgOYz0oM7N4qqK9ffFo/bi4OKSkpGDw4MGaNjKZDIGBgTh27Fi595Ofn4+srCytCxER1R9SMwkmdvfA4deCMKt/S8jMzRAZl4aH1/yJl7fF4EZGrqFLpAakwYQdIQTmzJmD3r17w8/PDwCQklJ8cipnZ2etts7OzprryhIWFgaFQqG5uLu7117hRERUZY1k5nhlcBscejUIYzq7AQB+PHUd/VYexsrfLuJufpGBK6SGoMGEnZkzZ+L06dPYsmVLqeskEu0TUQkhSm273/z585GZmam5JCUl1Xi9RERUc1ztrLBqfCfsmtkbAV72yC9SY82hywh67zA2H09EkYqDmKl8DSLszJo1Czt37sShQ4fQrFkzzXalUgkApXpxUlNTS/X23E8mk6Fx48ZaFyIiqv/aN1Ng63M98OkTXeDlaIPbd/Ox4MczGPHRHwi/dMvQ5VE9Va/DjhACM2fOxI4dO3Dw4EF4eXlpXe/l5QWlUon9+/drthUUFCA8PBy9evWq63KJiKgOSCQSDGmnxG8hffHWSF8orCxw8WY2pnwZiSlfRuLSTc7GJW3mhi6gIi+++CI2b96Mn3/+Gba2tpoeHIVCASsrK0gkEoSEhGDZsmVo1aoVWrVqhWXLlsHa2hqTJk0ycPVERFSbLM3N8HRvL4zxd9OsrB5+6RaO/nOLK6uTlno99by8cTcbNmzA1KlTART3/ixevBiffvop0tPTERAQgE8++UQziFkXnHpORNTwxd++h+V7L+DXs1xZ3VRwuQg9MOwQERmPqPg0vL1be2X114a2waiOblxZ3cgw7OiBYYeIyLio1QK7Tt/Air0Xcf3fc/J0aKbAG1xZ3agw7OiBYYeIyDiVrKy+7vAVzTl5uLK68WDY0QPDDhGRcbuVnY/VBy5hC1dWNyoMO3pg2CEiMg2XbmZj2Z7zOHyx+Jw8CisLvDSgFZ7o4QlL83p9NhYqA8OOHhh2iIhMy5FLt7Bsz3lcSCk+J09zB2vMG9YWQ9o5V3gGfqpfGHb0wLBDRGR6VGqB708kYeX9K6s3t8cbI7myekPBsKMHhh0iItN1N78In4ZfwfqjV5FXWLzG1uhOrnhtqA/c7KwMXB1VhGFHDww7RESUnJmL9367iB3R1wEAMnMzPNPHCzOCWqKRrF4vOGCyGHb0wLBDREQlzlzLxNu/xOJ4XBoAwLGRJeYMaoNxXZvBXMpBzPUJw44eGHaIiOh+Qgjsj72JsF8vIO72PQBAa+dGWDjCF4Gtmxq4OirBsKMHhh0iIipLQZEa3x5PwIe//4OMnEIAQN/WTbFweFu0UdoauDpi2NEDww4REVUkM6cQHx/8Bxsj4lGoEjCTAOO7uePlQa3hZCs3dHkmi2FHDww7RESki4Q7xSur7znz38rqM4K88UyfFlxZ3QAYdvTAsENERPqIik/D27+cx99JGQAAF4Ucr3Nl9TrHsKMHhh0iItJXeSurLxzeFgEtHAxcnWlg2NEDww4REVVVXqEKX/4Zh7WH/ltZfUg7Z8wb1hZeXFm9VjHs6IFhh4iIquv23Xx8sJ8rq9clhh09MOwQEVFN+effldUP3bey+qz+LfFkz+ZcWb2GMezogWGHiIhq2tF/buGdXx5cWd0HQ9opubJ6DWHY0QPDDhER1YaSldXf338Jt7L/W1l94Yi26OhuZ9jijADDjh4YdoiIqDbd+3dl9c+4snqNYtjRA8MOERHVBa6sXrMYdvTAsENERHXp7PXildX/uvrfyuovD2qN8V3dubK6Hhh29MCwQ0REda28ldUXDG+LoDZOBq6uYWDY0QPDDhERGUpZK6v3aeWIhSPawkfJ36SKMOzogWGHiIgMLTOnEGsO/YOvjnFldV0x7OiBYYeIiOqL8lZWn9a7BawsubL6/Rh29MCwQ0RE9c2J+DQsfWBl9deGtMHoTlxZvQTDjh4YdoiIqD4qa2X19m4KvDGCK6sDDDt6YdghIqL6jCurl41hRw8MO0RE1BDcvpuP1QcuYUtkElRqAXMzCZ7o6YnZA1qZ5MrqDDt6YNghIqKGhCurF2PY0QPDDhERNUQPrqzu6WCNeUN9MNTPNFZWZ9jRA8MOERE1VCq1wA8nk7By338rq3dr3gRvjPA1+pXVGXb0wLBDREQNnSmurM6woweGHSIiMhbJmblY+dsl7Dh1DUIUr6w+rbcXZgR5w1ZuYejyahTDjh4YdoiIyNiYwsrqDDt6YNghIiJjJITAgfOpCNtzHlf/XVm9lVMjLBxhHCurM+zogWGHiIiMWaFKjW//SsBqI1tZnWFHDww7RERkCkpWVt94LAEFKnWDX1mdYUcPDDtERGRKEu/kYPneC/jlTDKA4pXVnw/0xjN9GtbK6gw7emDYISIiU3QiPg1v/3IeMQ10ZXWGHT0w7BARkakSQmDX6WQs//WC1srqC0e0RY96vrI6w44eGHaIiMjU5RWqsOHPeHxy6LJmZfXBvs6YP7z+rqzOsKMHhh0iIqJiDWlldYYdPTDsEBERabucmo1ley7g4IVUAEBjuTleGtAKT/T0hMy8fgxiZtjRA8MOERFR2f745zbe/iVWs7K6h7015g+rHyurM+zogWGHiIiofCq1wPaT1/DevotaK6svHOGLTgZcWZ1hRw8MO0RERJW7l1+ET49cxWdHrmhWVh/VyRWvG2hldYYdPTDsEBER6S4lMw8r913E9ujildUtzc3wjAFWVmfY0QPDDhERkf7OXs/EO7+cR8TVOwCKV1YPGdgaE7r9t7K6Si0QGZeG1Ow8ONnK0d3LHtIaOmEhw44eGHaIiIiqpryV1ReMaIu8giIs2X0eyZl5mvYuCjlCg30x1M+l2o/NsKMHhh0iIqLqKVSpsfl4IlYfuIT0f1dWL0tJn866yf7VDjy6/n6bVetRiIiIiABYSM0wpVdzHH6tH57p41Vuu5IelsW7YqFS101/C8MOERER1RiFlQUG+DhX2EYASM7MQ2RcWp3UxLBDRERENSo1O6/yRnq0qy6GHSIiIqpRTrbyGm1XXUYTdtauXQsvLy/I5XJ06dIFR48eNXRJREREJqm7lz1cFHKUN8FcguJZWd297OukHqMIO9u2bUNISAgWLlyIU6dOoU+fPhg2bBgSExMNXRoREZHJkZpJEBrsCwClAk/Jv0ODfWvsfDuVMYqp5wEBAfD398e6des029q2bYvRo0cjLCys0ttz6jkREVHN23s2GYt3xRr8PDvm1X4kAysoKMDJkycxb948re2DBw/GsWPHyrxNfn4+8vPzNf/Oysqq1RqJiIhM0VA/FwzyVdbaGZR11eDDzu3bt6FSqeDsrD3NzdnZGSkpKWXeJiwsDIsXL66L8oiIiEya1EyCnt4OBq3BKMbsAIBEop0ShRCltpWYP38+MjMzNZekpKS6KJGIiIgMoMH37Dg6OkIqlZbqxUlNTS3V21NCJpNBJpPVRXlERERkYA2+Z8fS0hJdunTB/v37tbbv378fvXr1MlBVREREVF80+J4dAJgzZw6eeOIJdO3aFT179sRnn32GxMREPP/884YujYiIiAzMKMLO+PHjcefOHSxZsgTJycnw8/PDnj174OnpaejSiIiIyMCM4jw71cXz7BARETU8uv5+N/gxO0REREQVYdghIiIio8awQ0REREbNKAYoV1fJsCUuG0FERNRwlPxuVzb8mGEHwJ07dwAA7u7uBq6EiIiI9JWdnQ2FQlHu9Qw7AOzt7QEAiYmJFe4sqnlZWVlwd3dHUlISZ8LVMe57w+B+Nxzue8OprX0vhEB2djZcXV0rbMewA8DMrHjokkKh4AfAQBo3bsx9byDc94bB/W443PeGUxv7XpdOCg5QJiIiIqPGsENERERGjWEHxaugh4aGciV0A+C+Nxzue8Pgfjcc7nvDMfS+53IRREREZNTYs0NERERGjWGHiIiIjBrDDhERERk1hh0iIiIyaiYfdtauXQsvLy/I5XJ06dIFR48eNXRJRicsLAzdunWDra0tnJycMHr0aFy8eFGrjRACixYtgqurK6ysrBAUFIRz584ZqGLjFBYWBolEgpCQEM027vfadf36dUyePBkODg6wtrZGp06dcPLkSc313P81r6ioCG+88Qa8vLxgZWWFFi1aYMmSJVCr1Zo23O8148iRIwgODoarqyskEgl++uknret12c/5+fmYNWsWHB0dYWNjg4cffhjXrl2r+WKFCdu6dauwsLAQ69evF7GxsWL27NnCxsZGJCQkGLo0ozJkyBCxYcMGcfbsWRETEyNGjBghPDw8xN27dzVt3n33XWFrayu2b98uzpw5I8aPHy9cXFxEVlaWASs3HpGRkaJ58+aiQ4cOYvbs2Zrt3O+1Jy0tTXh6eoqpU6eK48ePi7i4OHHgwAFx+fJlTRvu/5r39ttvCwcHB7F7924RFxcnvv/+e9GoUSOxevVqTRvu95qxZ88esXDhQrF9+3YBQPz4449a1+uyn59//nnh5uYm9u/fL6Kjo0W/fv1Ex44dRVFRUY3WatJhp3v37uL555/X2ubj4yPmzZtnoIpMQ2pqqgAgwsPDhRBCqNVqoVQqxbvvvqtpk5eXJxQKhfjf//5nqDKNRnZ2tmjVqpXYv3+/CAwM1IQd7vfaNXfuXNG7d+9yr+f+rx0jRowQTz/9tNa2MWPGiMmTJwshuN9ry4NhR5f9nJGRISwsLMTWrVs1ba5fvy7MzMzE3r17a7Q+kz2MVVBQgJMnT2Lw4MFa2wcPHoxjx44ZqCrTkJmZCeC/BVjj4uKQkpKi9VrIZDIEBgbytagBL774IkaMGIGBAwdqbed+r107d+5E165d8dhjj8HJyQmdO3fG+vXrNddz/9eO3r174/fff8elS5cAAH///Tf++OMPDB8+HAD3e13RZT+fPHkShYWFWm1cXV3h5+dX46+FyS4Eevv2bahUKjg7O2ttd3Z2RkpKioGqMn5CCMyZMwe9e/eGn58fAGj2d1mvRUJCQp3XaEy2bt2K6OhoREVFlbqO+712Xb16FevWrcOcOXOwYMECREZG4qWXXoJMJsOTTz7J/V9L5s6di8zMTPj4+EAqlUKlUuGdd97BxIkTAfB9X1d02c8pKSmwtLREkyZNSrWp6d9hkw07JSQSida/hRCltlHNmTlzJk6fPo0//vij1HV8LWpWUlISZs+ejX379kEul5fbjvu9dqjVanTt2hXLli0DAHTu3Bnnzp3DunXr8OSTT2racf/XrG3btmHTpk3YvHkz2rVrh5iYGISEhMDV1RVTpkzRtON+rxtV2c+18VqY7GEsR0dHSKXSUukxNTW1VBKlmjFr1izs3LkThw4dQrNmzTTblUolAPC1qGEnT55EamoqunTpAnNzc5ibmyM8PBwfffQRzM3NNfuW+712uLi4wNfXV2tb27ZtkZiYCIDv+9ry2muvYd68eZgwYQLat2+PJ554Ai+//DLCwsIAcL/XFV32s1KpREFBAdLT08ttU1NMNuxYWlqiS5cu2L9/v9b2/fv3o1evXgaqyjgJITBz5kzs2LEDBw8ehJeXl9b1Xl5eUCqVWq9FQUEBwsPD+VpUw4ABA3DmzBnExMRoLl27dsXjjz+OmJgYtGjRgvu9Fj300EOlTrFw6dIleHp6AuD7vrbk5OTAzEz7p00qlWqmnnO/1w1d9nOXLl1gYWGh1SY5ORlnz56t+deiRoc7NzAlU8+/+OILERsbK0JCQoSNjY2Ij483dGlGZcaMGUKhUIjDhw+L5ORkzSUnJ0fT5t133xUKhULs2LFDnDlzRkycOJFTQWvB/bOxhOB+r02RkZHC3NxcvPPOO+Kff/4R3377rbC2thabNm3StOH+r3lTpkwRbm5umqnnO3bsEI6OjuL111/XtOF+rxnZ2dni1KlT4tSpUwKAWLVqlTh16pTm9C267Ofnn39eNGvWTBw4cEBER0eL/v37c+p5bfjkk0+Ep6ensLS0FP7+/prp0FRzAJR52bBhg6aNWq0WoaGhQqlUCplMJvr27SvOnDljuKKN1INhh/u9du3atUv4+fkJmUwmfHx8xGeffaZ1Pfd/zcvKyhKzZ88WHh4eQi6XixYtWoiFCxeK/Px8TRvu95px6NChMr/bp0yZIoTQbT/n5uaKmTNnCnt7e2FlZSVGjhwpEhMTa7xWiRBC1GxfEREREVH9YbJjdoiIiMg0MOwQERGRUWPYISIiIqPGsENERERGjWGHiIiIjBrDDhERERk1hh0iIiIyagw7RERlaN68OVavXq35t0QiwU8//WSweoio6hh2iKjemTp1KiQSCSQSCczNzeHh4YEZM2aUWjCQiEgXDDtEVC8NHToUycnJiI+Px+eff45du3bhhRdeMHRZRNQAMewQUb0kk8mgVCrRrFkzDB48GOPHj8e+ffs012/YsAFt27aFXC6Hj48P1q5dq3X7a9euYcKECbC3t4eNjQ26du2K48ePAwCuXLmCUaNGwdnZGY0aNUK3bt1w4MCBOn1+RFR3zA1dABFRZa5evYq9e/fCwsICALB+/XqEhoZizZo16Ny5M06dOoVnn30WNjY2mDJlCu7evYvAwEC4ublh586dUCqViI6OhlqtBgDcvXsXw4cPx9tvvw25XI6NGzciODgYFy9ehIeHhyGfKhHVAoYdIqqXdu/ejUaNGkGlUiEvLw8AsGrVKgDA0qVL8f7772PMmDEAAC8vL8TGxuLTTz/FlClTsHnzZty6dQtRUVGwt7cHALRs2VJz3x07dkTHjh01/3777bfx448/YufOnZg5c2ZdPUUiqiMMO0RUL/Xr1w/r1q1DTk4OPv/8c1y6dAmzZs3CrVu3kJSUhGnTpuHZZ5/VtC8qKoJCoQAAxMTEoHPnzpqg86B79+5h8eLF2L17N27cuIGioiLk5uYiMTGxTp4bEdUthh0iqpdsbGw0vTEfffQR+vXrh8WLF2t6XtavX4+AgACt20ilUgCAlZVVhff92muv4bfffsPKlSvRsmVLWFlZYezYsSgoKKiFZ0JEhsawQ0QNQmhoKIYNG4YZM2bAzc0NV69exeOPP15m2w4dOuDzzz9HWlpamb07R48exdSpU/HII48AKB7DEx8fX5vlE5EBcTYWETUIQUFBaNeuHZYtW4ZFixYhLCwMH374IS5duoQzZ85gw4YNmjE9EydOhFKpxOjRo/Hnn3/i6tWr2L59OyIiIgAUj9/ZsWMHYmJi8Pfff2PSpEmawctEZHwYdoiowZgzZw7Wr1+PIUOG4PPPP8dXX32F9u3bIzAwEF999RW8vLwAAJaWlti3bx+cnJwwfPhwtG/fHu+++67mMNcHH3yAJk2aoFevXggODsaQIUPg7+9vyKdGRLVIIoQQhi6CiIiIqLawZ4eIiIiMGsMOERERGTWGHSIiIjJqDDtERERk1Bh2iIiIyKgx7BAREZFRY9ghIiIio8awQ0REREaNYYeIiIiMGsMOERERGTWGHSIiIjJqDDtERERk1P4PFfbNIRsUmPQAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHFCAYAAAAe+pb9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABlK0lEQVR4nO3dd1hT1/8H8HcIkABCEBACCIg4kOLCgRsnTqq11j27rKulauuq4milWuu3rVZ/ra3W1jpaV121bqoVFUXrwFGVqSDKVnZyfn9YUiOggIEAeb+eJ0/LzbnJJzc3ydt7zzlXIoQQICIiIjJARvougIiIiEhfGISIiIjIYDEIERERkcFiECIiIiKDxSBEREREBotBiIiIiAwWgxAREREZLAYhIiIiMlgMQkRERGSwDC4I/fDDD5BIJJqbsbExateujXHjxuHOnTsVXs/YsWNRp06dUq0TFRUFiUSCH374oVxqep6xY8dqbUNTU1N4eHhg+vTpSE9P10tNTypq+xS871FRUXqrS19WrFiBevXqwdTUFBKJBKmpqfouqUpbtWpVuX32JBIJ5s+fXy6PXZx9+/a98HPm5ubinXfegaOjI6RSKZo1awYASE5OxtChQ2Fvbw+JRIIBAwa8cL0AMHXqVDRt2lQnj/Usx44dg0QiwbFjxzTLxo4dixo1ajx33c6dO6Nz586avzMzMzF//nytx6rMSvo6K5s6depg7NixpVrHuHxKqfzWrVsHT09PZGVl4c8//0RwcDBCQkJw6dIlWFhYVFgdc+fOxXvvvVeqdRwdHREaGgoPD49yqur5zMzMcOTIEQBAamoqtm7dis8//xwXL17EgQMH9FYXabtw4QLeffddvPnmmxgzZgyMjY1haWmp77KqtFWrVsHOzq7UX7aV1b59+/D111+/UBhavXo1vvnmG6xYsQItWrTQ/IAuWrQIO3bswNq1a+Hh4QEbGxud1Lx9+3a8/vrrOnmsZ/Hx8UFoaCi8vLxKve6qVau0/s7MzMSCBQsAQCsgkf4ZbBDy9vZGy5YtAQBdunSBSqXCokWLsHPnTowYMaLIdTIzM2Fubq7TOsoSZmQyGdq0aaPTOkrLyMhIq4ZevXrh9u3bOHjwICIjI+Hu7q7H6iq3rKwsmJmZVchzXblyBQDw1ltvoXXr1jp5zPL4HFDVdvnyZZiZmWHy5MmFlnt4eBT7nVoWYWFhiI6OxquvvqqzxyyOlZVVmb9ryxKeSD8M7tRYcQp29ujoaAD/HRa8dOkS/P39YWlpiW7dugF4fBj4448/hqenJ2QyGWrVqoVx48bh/v37hR5348aNaNu2LWrUqIEaNWqgWbNm+P777zX3F3Vq7Ndff4Wvry8UCgXMzc1Rt25drX/9FHdq7MSJE+jWrRssLS1hbm6Odu3aYe/evVptCk4RHT16FBMmTICdnR1sbW0xcOBA3L17t8zbD4AmWN67d09r+ZYtW9C2bVtYWFigRo0a6NmzJ86fP19o/dOnTyMgIAC2traQy+Xw8PBAYGCg5v6bN29i3LhxqF+/PszNzeHs7IyAgABcunTphep+2rVr1zBs2DA4ODhAJpPB1dUVo0ePRk5ODgBg/vz5kEgkhdYr6vRbnTp10K9fP2zfvh3NmzeHXC7HggUL0Lx5c3Ts2LHQY6hUKjg7O2PgwIGaZaXZ357UuXNnjBw5EgDg6+sLiUSidRRj7dq1aNq0KeRyOWxsbPDKK6/g6tWrWo/xrM9BWbcf8PgHsn///qhZsybkcjmaNWuG9evXaz1OwWmJjRs3YsaMGXB0dESNGjUQEBCAe/fuISMjA2+//Tbs7OxgZ2eHcePG4eHDh1qPIZFIMHnyZHzzzTdo0KABZDIZvLy8sHnzZq12JX1P69SpgytXriAkJERzavjJz296ejqmT58Od3d3mJqawtnZGYGBgXj06JHW46anp+Ott96Cra0tatSogV69euHGjRvP3K5PSk1NxbRp01C3bl3IZDLY29ujT58+uHbtmta2e/o0zNPfHWPHjsXXX3+t2VYFt4LXm52djVmzZmm9nkmTJmmdXpVIJPjuu++QlZWlWb9gux06dAhXr17VLC+oZ/Xq1WjatClq1KgBS0tLeHp6Yvbs2SV67du2bUPDhg3x0ksvFdtGrVbj448/RsOGDWFmZgZra2s0adIEX375pVa75+2rxW3Hp/3111+ws7NDv379NO/1k6fGoqKiUKtWLQDAggULNNvjWUcVizudX1RNnTt3hre3N8LCwtCxY0fN78ann34KtVpdaN0NGzZg6tSpUCqVMDMzg5+fX5HfycDj790+ffqgRo0acHFxwbRp07Q+ywWvydfXFzY2NrCysoKPjw++//57PH1N9yNHjqBz586wtbWFmZkZXF1d8eqrryIzM1PTpqTfd3l5efjwww+hVCphbm6ODh064MyZM8Vuz2cx2CNCT7t58yYAaHZW4PEb8vLLL2P8+PGYOXMm8vPzoVar0b9/fxw/fhwffvgh2rVrh+joaAQFBaFz5844e/as5l/78+bNw6JFizBw4EBMmzYNCoUCly9f1oStooSGhmLIkCEYMmQI5s+fD7lcjujoaM1pqOKEhISgR48eaNKkCb7//nvIZDKsWrUKAQEB2LRpE4YMGaLV/s0330Tfvn2xceNGxMbG4oMPPsDIkSOf+zzPEhkZCWNjY9StW1ezbPHixfjoo48wbtw4fPTRR8jNzcVnn32Gjh074syZM5p/Nf3xxx8ICAhAo0aNsHz5cri6uiIqKkrrNNvdu3dha2uLTz/9FLVq1UJycjLWr18PX19fnD9/Hg0bNixz7QX+/vtvdOjQAXZ2dli4cCHq16+P+Ph47Nq1C7m5uZDJZKV+zPDwcFy9ehUfffQR3N3dYWFhAScnJ7z33nv4559/UL9+fU3bAwcO4O7duxg3bhwAlGp/e9qqVauwadMmfPzxx5pTwQX7d3BwMGbPno1hw4YhODgYSUlJmD9/Ptq2bYuwsDCtmor6HLzI9rt+/TratWsHe3t7fPXVV7C1tcWGDRswduxY3Lt3Dx9++KHWY86ePRtdunTBDz/8gKioKEyfPh3Dhg2DsbExmjZtik2bNuH8+fOYPXs2LC0t8dVXX2mtv2vXLhw9ehQLFy6EhYUFVq1apVl/0KBBpXovd+zYgUGDBkGhUGhOfRTsE5mZmfDz80NcXBxmz56NJk2a4MqVK5g3bx4uXbqEQ4cOQSKRQAiBAQMG4OTJk5g3bx5atWqFv/76C7179y5RDRkZGejQoQOioqIwY8YM+Pr64uHDh/jzzz8RHx8PT0/PEr+euXPn4tGjR9i6dStCQ0M1yx0dHTV1Hj58GLNmzULHjh1x8eJFBAUFITQ0FKGhoZDJZAgNDcWiRYtw9OhRzfeHu7s7QkNDMXHiRKSlpeHnn38GAE0InThxIqZMmYJly5bByMgIN2/eRERERIlq3rZtGwYPHvzMNkuXLsX8+fPx0UcfoVOnTsjLy8O1a9e0ApyuPuu//PILRo8ejddffx0rVqyAVCot1MbR0RH79+9Hr1698MYbb+DNN98EoP1786ISEhIwYsQITJs2DUFBQdixYwdmzZoFJycnjB49Wqvt7Nmz4ePjg++++w5paWmYP38+OnfujPPnz2t9f+fl5eHll1/GG2+8gWnTpuHPP//EokWLoFAoMG/ePE27qKgojB8/Hq6urgCAU6dOYcqUKbhz546mXVRUFPr27YuOHTti7dq1sLa2xp07d7B//37k5ubC3Ny8VN93b731Fn788UdMnz4dPXr0wOXLlzFw4EBkZGSUfuMJA7Nu3ToBQJw6dUrk5eWJjIwMsWfPHlGrVi1haWkpEhIShBBCjBkzRgAQa9eu1Vp/06ZNAoDYtm2b1vKwsDABQKxatUoIIcTt27eFVCoVI0aMeGY9Y8aMEW5ubpq/ly1bJgCI1NTUYteJjIwUAMS6des0y9q0aSPs7e1FRkaGZll+fr7w9vYWtWvXFmq1Wuv1T5w4Uesxly5dKgCI+Pj4Z9ZbULOFhYXIy8sTeXl54sGDB2L16tXCyMhIzJ49W9MuJiZGGBsbiylTpmitn5GRIZRKpRg8eLBmmYeHh/Dw8BBZWVnPff4nX19ubq6oX7++eP/99zXLi9o+Ba87MjLymY/ZtWtXYW1tLRITE4ttExQUJIr66BT1HG5ubkIqlYrr169rtX3w4IEwNTXV2l5CCDF48GDh4OAg8vLyhBAl39+KU1BTWFiYZllKSoowMzMTffr00WobExMjZDKZGD58uGZZcZ+D4pRk+w0dOlTIZDIRExOjtbx3797C3Nxcs+8fPXpUABABAQFa7QIDAwUA8e6772otHzBggLCxsdFaBkCYmZlpPtdCPN5vPD09Rb169TTLSvOevvTSS8LPz69Q2+DgYGFkZKS1rYUQYuvWrQKA2LdvnxBCiN9//10AEF9++aVWu08++UQAEEFBQYUe+0kLFy4UAMTBgweLbVOw7Y4ePaq1vKjPxqRJk4p87fv37xcAxNKlS7WWb9myRQAQ3377rWZZwXfC0/z8/MRLL72ktWzy5MnC2tr6WS+xWBcuXBAAxLlz557Zrl+/fqJZs2bPbFOSfbWo7fjka/3000+FVCoVS5YsKbSun5+f1n5y//79Er2/BYr7ziqqJj8/PwFAnD59Wqutl5eX6NmzZ6F1fXx8NL8JQggRFRUlTExMxJtvvqn1OgGIX375Resx+/TpIxo2bFhs3SqVSuTl5YmFCxcKW1tbzfMUfA4uXLhQ7Lol/b67evWqAKD1vS+EED///LMAIMaMGVPscxTFYE+NtWnTBiYmJrC0tES/fv2gVCrx+++/w8HBQavd0+eh9+zZA2trawQEBCA/P19za9asGZRKpeZw5cGDB6FSqTBp0qRS1dWqVSsAwODBg/HLL7+UaCTbo0ePcPr0aQwaNEirl79UKsWoUaMQFxeH69eva63z8ssva/3dpEkTAP+dGlSr1VqvT6VSFXpOExMTmJiYwM7ODhMmTMCQIUPwySefaNr88ccfyM/Px+jRo7UeSy6Xw8/PT7Otbty4gVu3buGNN96AXC4v9nXm5+dj8eLF8PLygqmpKYyNjWFqaop//vmn0CmdssjMzERISAgGDx6s03+pNWnSBA0aNNBaZmtri4CAAKxfv15z6DolJQW//fYbRo8eDWPjxwdrS7q/lUZoaCiysrIKHZZ3cXFB165dcfjw4ULrlKQ/Rkm335EjR9CtWze4uLhoLR87diwyMzO1jkwAQL9+/bT+btSoEQCgb9++hZYnJycXOj3WrVs3rc+1VCrFkCFDcPPmTcTFxT33dZXUnj174O3tjWbNmmm9Vz179tQ6lXH06FEAKNRvZvjw4SV6nt9//x0NGjRA9+7ddVZ7UQqO7jy9n7z22muwsLAocj8pidatWyM1NRXDhg3Db7/9hgcPHpR43W3btqFOnTrw8fF57nP8/fffmDhxIv74449Co1lf9LMuhMD48eMRFBSEjRs3FjqKqQ9KpbJQP8AmTZoUeQZi+PDhWqeC3dzc0K5dO82+WUAikSAgIOC5j3nkyBF0794dCoUCUqkUJiYmmDdvHpKSkpCYmAgAaNasGUxNTfH2229j/fr1uH37dqG6Svp9V9xnaPDgwZrvztIw2CD0448/IiwsDOfPn8fdu3dx8eJFtG/fXquNubk5rKystJbdu3cPqampMDU11QSBgltCQoLmQ11wPrN27dqlqqtTp07YuXOnJkDUrl0b3t7e2LRpU7HrpKSkQAgBR0fHQvc5OTkBAJKSkrSW29raav1dcBg4KysLALBw4UKt1/Z0p24zMzOEhYUhLCwMu3fvRufOnbFp0yZ8+umnmjYFfYVatWpVaFtt2bKl1Ntq6tSpmDt3LgYMGIDdu3fj9OnTCAsLQ9OmTTV1v4iUlBSoVKpSv2fPU9T7AgCvv/467ty5g4MHDwIANm3ahJycHK0fnpLub6VRsC8Ut788va8U9TkoSkm3X1JSUqn21adHGpmamj5zeXZ2ttZypVJZ6LkKlj39XC/i3r17uHjxYqH3ydLSEkIIzXuVlJQEY2PjQp/Bouosyv3793W+jxaloM6ng4JEIoFSqSzzths1ahTWrl2r6fBsb28PX19fzefgWbZu3VqiUD5r1iwsW7YMp06dQu/evWFra4tu3brh7NmzAF78s56bm4stW7bgpZdeKvEpzfL29P4EPP5eL+q7sbjPRFGf/af/cSqTybQ+Y2fOnIG/vz8AYM2aNfjrr78QFhaGOXPmAPjvN8XDwwOHDh2Cvb09Jk2aBA8PD3h4eGj12yrp911BnU+/jqI+VyVhsH2EGjVqpOncW5yiOk8WdC7ev39/kesUDE0u+PKIi4sr9C/f5+nfvz/69++PnJwcnDp1CsHBwRg+fDjq1KmDtm3bFmpfs2ZNGBkZIT4+vtB9BR2g7ezsSlXD22+/rfUv8afPlxsZGWltvx49eqBFixZYsGABRowYARcXF81zbt26FW5ubsU+15Pb6lk2bNiA0aNHY/HixVrLHzx4AGtr6xK9rmexsbGBVCp9bh0FXww5OTla26W4UFLUfgQAPXv2hJOTE9atW4eePXti3bp18PX11RptUtL9rTQKviiK21+e3leKq/9pJd1+tra2Ot1XnychIaHYZQXborTvaVHs7OxgZmaGtWvXFnt/wXPm5+cjKSlJ60u7qDqLUqtWrVLto08qzespqPP+/ftaYUgIgYSEBM3R67IYN24cxo0bh0ePHuHPP/9EUFAQ+vXrhxs3bhT7XXH16lVcvXpVa7BJcYyNjTF16lRMnToVqampOHToEGbPno2ePXsiNja2xPtqcWQyGY4ePYqePXuie/fu2L9/P2rWrFmmxyqOLt7D4hT3mShLiNi8eTNMTEywZ88erdC0c+fOQm07duyIjh07QqVS4ezZs1ixYgUCAwPh4OCAoUOHlvj7rqDOhIQEODs7a+4v+FyVlsEeESqrfv36ISkpCSqVCi1btix0K+iw6+/vD6lUitWrV5f5uWQyGfz8/LBkyRIAKLZXv4WFBXx9fbF9+3at9K9Wq7FhwwbUrl270KmZ53FyctJ6XY0bN35urV9//TWys7Px8ccfA3j8Q29sbIxbt24Vua0KglSDBg3g4eGBtWvXFvrQP0kikRQKZHv37tXZRJgFoyd+/fXXZ37ZFIwSunjxotby3bt3l+r5Ck5d7ty5E8ePH8fZs2cLzY1S0v2tNNq2bQszMzNs2LBBa3lcXJzmtFVZlHT7devWDUeOHCk0SvHHH3+Eubm5zqeGOHz4sNZIRpVKhS1btsDDw0NzRKA072lx/8ru168fbt26BVtb2yLfq4Ln6NKlCwBoOhAX2LhxY4leT+/evXHjxo1nDmwo7vXs2rWryNcDoNBrKtgPnt5Ptm3bhkePHpV5P3mShYUFevfujTlz5iA3N1cz3UNRtm3bBicnp1LvH9bW1hg0aBAmTZqE5ORkREVFlXhffZbmzZsjJCQEcXFx6Ny5s+YUUHGK287FKc17WFqbNm3SGtEVHR2NkydPlml+o4KJiZ/sJJ6VlYWffvqp2HWkUil8fX01IxbDw8MBlPz7rqDOpz9Dv/zyyzMHcxTHYI8IldXQoUPx888/o0+fPnjvvffQunVrmJiYIC4uDkePHkX//v3xyiuvoE6dOpg9ezYWLVqErKwsDBs2DAqFAhEREXjw4IFmYq2nzZs3D3FxcejWrRtq166N1NRUfPnllzAxMYGfn1+xdQUHB6NHjx7o0qULpk+fDlNTU6xatQqXL1/Gpk2bSvyv+hfh5+eHPn36YN26dZg5cybc3d2xcOFCzJkzB7dv30avXr1Qs2ZN3Lt3D2fOnIGFhYVmO3z99dcICAhAmzZt8P7778PV1RUxMTH4448/NDt7v3798MMPP8DT0xNNmjTBuXPn8Nlnn+n0NMHy5cvRoUMH+Pr6YubMmahXrx7u3buHXbt24ZtvvoGlpSX69OkDGxsbvPHGG1i4cCGMjY3xww8/IDY2ttTP9/rrr2PJkiUYPnw4zMzMCo3uK+n+VhrW1taYO3cuZs+ejdGjR2PYsGFISkrCggULIJfLERQUVOrXUaAk2y8oKAh79uxBly5dMG/ePNjY2ODnn3/G3r17sXTpUigUijI/f1Hs7OzQtWtXzJ07VzNq7Nq1a1pD6EvznjZu3BibN2/Gli1bULduXcjlcjRu3BiBgYHYtm0bOnXqhPfffx9NmjSBWq1GTEwMDhw4gGnTpsHX1xf+/v7o1KkTPvzwQzx69AgtW7bEX3/99cwfjicFBgZiy5Yt6N+/P2bOnInWrVsjKysLISEh6NevH7p06QKlUonu3bsjODgYNWvWhJubGw4fPozt27cX+XoAYMmSJejduzekUimaNGmCHj16oGfPnpgxYwbS09PRvn17zaix5s2bY9SoUWV6P9566y2YmZmhffv2cHR0REJCAoKDg6FQKJ55lGnr1q0YOHBgib7LAgICNHPF1apVC9HR0fjiiy/g5uamGRFZkn31eRo1aoTjx4+je/fu6NSpEw4dOlTs95GlpSXc3Nzw22+/oVu3brCxsYGdnV2xVxZo1aoVGjZsiOnTpyM/Px81a9bEjh07cOLEiefW9TyJiYl45ZVX8NZbbyEtLQ1BQUGQy+WYNWtWqR+rb9++WL58OYYPH463334bSUlJWLZsWaF/tP7f//0fjhw5gr59+8LV1RXZ2dmao6cF/d1K+n3XqFEjjBw5El988QVMTEzQvXt3XL58GcuWLSvRafxCStW1uhooahRNUYobBSGEEHl5eWLZsmWiadOmQi6Xixo1aghPT08xfvx48c8//2i1/fHHH0WrVq007Zo3b641YuPpUWN79uwRvXv3Fs7OzsLU1FTY29uLPn36iOPHj2vaFDXyQwghjh8/Lrp27SosLCyEmZmZaNOmjdi9e3eJXn9xo0xKu20uXbokjIyMxLhx4zTLdu7cKbp06SKsrKyETCYTbm5uYtCgQeLQoUNa64aGhorevXsLhUIhZDKZ8PDw0BoVkJKSIt544w1hb28vzM3NRYcOHcTx48cLjc54kVFjQggREREhXnvtNWFraytMTU2Fq6urGDt2rMjOzta0OXPmjGjXrp2wsLAQzs7OIigoSHz33XdFjhrr27fvM5+vXbt2AkCxIwxLs7897Vn7+3fffSeaNGkiTE1NhUKhEP379xdXrlzRavOs97o4Jdl+ly5dEgEBAUKhUAhTU1PRtGnTQvtzwT7566+/lug1FYz8un//vmYZADFp0iSxatUq4eHhIUxMTISnp6f4+eefC9Vd0vc0KipK+Pv7C0tLSwFA6/P78OFD8dFHH4mGDRtqtmvjxo3F+++/rzVyLTU1Vbz++uvC2tpamJubix49eohr166VeFRRSkqKeO+994Srq6swMTER9vb2om/fvuLatWuaNvHx8WLQoEHCxsZGKBQKMXLkSHH27NlCn42cnBzx5ptvilq1agmJRKL1erOyssSMGTOEm5ubMDExEY6OjmLChAkiJSVFq57SjBpbv3696NKli3BwcBCmpqbCyclJDB48WFy8eLHY13vz5s0Sfz8JIcTnn38u2rVrJ+zs7DT74BtvvCGioqK02j1vX33eqLECcXFxwtPTU9SpU0fcunVL89qfHl146NAh0bx5cyGTyUo0uunGjRvC399fWFlZiVq1aokpU6aIvXv3Fjlq7OntXFDrk/tnwev56aefxLvvvitq1aolZDKZ6Nixozh79myhdYt6T4saYbl27VrRsGFDIZPJRN26dUVwcLD4/vvvtfal0NBQ8corrwg3Nzchk8mEra2t8PPzE7t27dJ6rJJ+3+Xk5Ihp06YJe3t7IZfLRZs2bURoaKhwc3Mr9agxiRBPzXhERFRNSCQSTJo0CStXrtR3KfQCli5dimXLliE+Pr7IeXqoZI4dO4YuXbrg119/LfUcWtUZ+wgREVGl9uGHHyIxMZEhiMoFgxAREREZLJ4aIyIiIoPFI0JERERksBiEiIiIyGAxCBEREZHB4oSKeDwD8927d2FpaVkhEw8SERHRixNCICMjA05OTjAyKtuxHQYhPL7GUWmvB0ZERESVQ2xsbJmvMsAghP8u5BYbG1u26bmJiIiowqWnp8PFxaVMF6AuwCCE/66ubWVlxSBERERUxbxItxZ2liYiIiKDxSBEREREBotBiIiIiAwWgxAREREZLAYhIiIiMlgMQkRERGSwGISIiIjIYDEIERERkcFiECIiIiKDxZmlKymVWuBMZDISM7JhbylHa3cbSI2ePXNmbr4aP4VGITo5E2425hjVtg5MjZl1iYiIiqPXX8k///wTAQEBcHJygkQiwc6dO7XuF0Jg/vz5cHJygpmZGTp37owrV65otcnJycGUKVNgZ2cHCwsLvPzyy4iLi6vAV6F7+y/Ho8OSIxi25hTe23wBw9acQoclR7D/cnyx6wTvi4Dn3N+xaO9V/BgajUV7r8Jz7u8I3hdRgZUTERFVLXoNQo8ePULTpk2xcuXKIu9funQpli9fjpUrVyIsLAxKpRI9evRARkaGpk1gYCB27NiBzZs348SJE3j48CH69esHlUpVUS9Dp/ZfjseEDeGIT8vWWp6Qlo0JG8KLDEPB+yLwzZ+RUAvt5WoBfPNnJMMQERFRMSRCCPH8ZuVPIpFgx44dGDBgAIDHR4OcnJwQGBiIGTNmAHh89MfBwQFLlizB+PHjkZaWhlq1auGnn37CkCFDAAB3796Fi4sL9u3bh549e5boudPT06FQKJCWlqbXi66q1AIdlhwpFIKeVMtShm3vtIPRvxE2L1+g6+fH8Kw30UgCXFvUm6fJiIioWtHF73el7SMUGRmJhIQE+Pv7a5bJZDL4+fnh5MmTGD9+PM6dO4e8vDytNk5OTvD29sbJkyeLDUI5OTnIycnR/J2enl5+L6QUzkQmPzMEAcD9jBx0+uxoqR5XLYCfQqPwRse6L1IeERFRtVNpDxEkJCQAABwcHLSWOzg4aO5LSEiAqakpatasWWybogQHB0OhUGhuLi4uOq6+bBIznh2CCphIJZAZG0FmbASp5NkdqAtEJ2e+SGlERETVUqUNQgUkT/3QCyEKLXva89rMmjULaWlpmltsbKxOan1R9pbyErX78XVfXP+4N65/3Buz+3iWaB03G/MXKY2IiKhaqrRBSKlUAkChIzuJiYmao0RKpRK5ublISUkptk1RZDIZrKystG6VQWt3Gzgq5CguwkkAOCoeD6UvMKptHTxnVD2MJI/bERERkbZKG4Tc3d2hVCpx8OBBzbLc3FyEhISgXbt2AIAWLVrAxMREq018fDwuX76saVOVSI0kCArwAoBCYajg76AAL635hEyNjfBWR/dnPu5bHd3ZUZqIiKgIeu0s/fDhQ9y8eVPzd2RkJC5cuAAbGxu4uroiMDAQixcvRv369VG/fn0sXrwY5ubmGD58OABAoVDgjTfewLRp02BrawsbGxtMnz4djRs3Rvfu3fX1sl5IL29HrB7pgwW7I7Q6TisVcgQFeKGXt2OhdWb1eRye1hzXHkIvAfB2J3fN/URERKRNr8Pnjx07hi5duhRaPmbMGPzwww8QQmDBggX45ptvkJKSAl9fX3z99dfw9vbWtM3OzsYHH3yAjRs3IisrC926dcOqVatK1QG6sgyff1JZZ5ZefzISS/+4jjyVwG+T2qOpi3XFFExERFTBdPH7XWnmEdKnyhiEXsSI707hr5tJ+HiAN0a2cdN3OUREROVCF7/f7DhSDfm4Pp5OIDwm5TktiYiIDBuDUDVUEITOx6TqtxAiIqJKjkGoGmruag0AiHzwCMmPcvVbDBERUSXGIFQNWZubwqOWBQDgPE+PERERFYtBqJpiPyEiIqLnYxCqpnzc/g1C0an6LYSIiKgSYxCqpgqOCP0dl4p8lVrP1RAREVVODELVVH37GrCUGSMzV4VrCRn6LoeIiKhSYhCqpoyMJGj27+gxdpgmIiIqGoNQNdZc02E6Vb+FEBERVVIMQtVYCzeOHCMiInoWBqFqrNm/F1yNTsrEg4c5+i2GiIioEmIQqsYUZiaob18DAC+3QUREVBQGoWqOEysSEREVj0GomvNxswYAhEczCBERET2NQaiae3JixTxOrEhERKSFQaia86hVA1ZyY2TnqXEtnhMrEhERPYlBqJp7PLEi+wkREREVhUHIALRgECIiIioSg5AB0HSYZhAiIiLSwiBkAJq5WEMiAWKTs3A/gxMrEhERFWAQMgCWchM0sLcEwKNCRERET2IQMhA8PUZERFQYg5CBKLgS/fnoVP0WQkREVIkwCBmIJydWzM3nxIpEREQAg5DBqGtnAYWZCXLy1bgan67vcoiIiCoFBiEDYWQkgY+rNQD2EyIiIirAIGRA/rsSfap+CyEiIqokGIQMiI/bv0GIV6InIiICwCBkUJq6WMNIAtxJzUJiera+yyEiItI7BiEDUkNmjAYOnFiRiIioAIOQgdGcHmM/ISIiIgYhQ1PQYfoc+wkRERExCBmagiH0l+6kcWJFIiIyeAxCBsbdzgI1zU2Qm6/Glbtp+i6HiIhIrxiEDIxEIuF8QkRERP9iEDJA/3WYZj8hIiIybAxCBqj5v/2EzrPDNBERGTgGIQPUtPbjiRXvpmUjIY0TKxIRkeFiEDJAFjJjeCqtAPD0GBERGTYGIQPl42YNgPMJERGRYWMQMlD/jRxjECIiIsPFIGSgCoLQlTvpyMlX6bkaIiIi/WAQMlButuawtTBFrkqNy3fS9V0OERGRXjAIGSiJRILm/x4VOs/TY0REZKAYhAxYQYdp9hMiIiJDxSBkwDQdpqNT9VsIERGRnjAIGbAmtRWQGkmQkJ6Nu6lZ+i6HiIiowjEIGTBzU2M0crQEwNNjRERkmBiEDFzB6TFOrEhERIaIQcjA/TexYqp+CyEiItIDBiED18LtcRCKuJuG7DxOrEhERIaFQcjA1a5pBrsaMuSpBC7fSdN3OURERBWKQcjASSQS+LhaA2CHaSIiMjwMQgQfN84nREREholBiLSuRC+E0HM1REREFYdBiNCktgLGRhIkZuTgDidWJCIiA8IgRJCbSOHlZAWA8wkREZFhqdRBKD8/Hx999BHc3d1hZmaGunXrYuHChVCr1Zo2QgjMnz8fTk5OMDMzQ+fOnXHlyhU9Vl01+WiuRJ+q30KIiIgqUKUOQkuWLMH//d//YeXKlbh69SqWLl2Kzz77DCtWrNC0Wbp0KZYvX46VK1ciLCwMSqUSPXr0QEZGhh4rr3o0HaY5coyIiAxIpQ5CoaGh6N+/P/r27Ys6depg0KBB8Pf3x9mzZwE8Phr0xRdfYM6cORg4cCC8vb2xfv16ZGZmYuPGjXquvmopGEIfcTedEysSEZHBqNRBqEOHDjh8+DBu3LgBAPj7779x4sQJ9OnTBwAQGRmJhIQE+Pv7a9aRyWTw8/PDyZMn9VJzVeVsbQZ7Sxny1QIX4zixIhERGQZjfRfwLDNmzEBaWho8PT0hlUqhUqnwySefYNiwYQCAhIQEAICDg4PWeg4ODoiOji72cXNycpCTk6P5Oz09vRyqr1oeT6xYE/uvJCA8JgWt3W30XRIREVG5q9RHhLZs2YINGzZg48aNCA8Px/r167Fs2TKsX79eq51EItH6WwhRaNmTgoODoVAoNDcXF5dyqb+q8XGzBgCEc+QYEREZiEodhD744APMnDkTQ4cORePGjTFq1Ci8//77CA4OBgAolUoA/x0ZKpCYmFjoKNGTZs2ahbS0NM0tNja2/F5EFfLkleg5sSIRERmCSh2EMjMzYWSkXaJUKtUMn3d3d4dSqcTBgwc19+fm5iIkJATt2rUr9nFlMhmsrKy0bgR4OytgIpXgwcMcxCZzYkUiIqr+KnUfoYCAAHzyySdwdXXFSy+9hPPnz2P58uV4/fXXATw+JRYYGIjFixejfv36qF+/PhYvXgxzc3MMHz5cz9VXPY8nVlTg79hUhMekwNXWXN8lERERlatKHYRWrFiBuXPnYuLEiUhMTISTkxPGjx+PefPmadp8+OGHyMrKwsSJE5GSkgJfX18cOHAAlpaWeqy86mrhWlMThAY0d9Z3OUREROVKItgZBOnp6VAoFEhLSzP402R7Lt7F5I3n4e1shT1TOuq7HCIiomLp4ve7UvcRoopX0GH6anwGMnPz9VwNERFR+WIQIi1O1mZQWsmh4sSKRERkABiEqBDNfEK87hgREVVzDEJUiGY+oehU/RZCRERUzhiEqJDm/wah8zEpnFiRiIiqNQYhKsTb2QqmUiMkPcpFdFKmvsshIiIqNwxCVIjMWApv58fDENlPiIiIqjMGISrSf9cdYxAiIqLqi0GIiuTjxg7TRERU/TEIUZEKjghdS0jHoxxOrEhERNUTgxAVSamQw0khh1oAf8el6rscIiKicsEgRMVq7lYwjD5Vv4UQERGVEwYhKtZ/EyuywzQREVVPDEJULB9XawCPR45xYkUiIqqOGISoWC85KWBqbISUzDxEPnik73KIiIh0jkGIimVqbIQmzgoAQDj7CRERUTXEIETPpJlPiBMrEhFRNcQgRM+k6SfEDtNERFQNMQjRMxWMHLt+LwNbwmIQeisJKjU7ThMRUfVgrO8CqHILj0mBVAKoBDBj2yUAgKNCjqAAL/TydtRzdURERC+GR4SoWPsvx2PChnConjoAlJCWjQkbwrH/crx+CiMiItIRBiEqkkotsGB3BIo6CVawbMHuCJ4mIyKiKo1BiIp0JjIZ8WnZxd4vAMSnZePXs7HIzVdXXGFEREQ6xD5CVKTEjOJD0JNmbr+Eub9dRn17SzRytIKXkxW8HB/fFOYmJX4+lVrgTGQyEjOyYW8pR2t3G0iNJGUtn4iIqEQYhKhI9pbyErUzN5UiM1eFiPh0RMSnY1v4f/c5W5s9DkeOlv8GJAVq1zSD0VMBZ//leCzYHaF1BIodsomIqCJIBC8ihfT0dCgUCqSlpcHKykrf5VQKKrVAhyVHkJCWXWQ/IQkApUKO4x92QXxaNq7+G4Qi7qbjakI6YpOzinzcGjJjNHK0hJejFRo5WiEtKxfBv18v8vEBYPVIH52GoaxcFRbvi0BUUibq2Jpjdh8vmJlKdfb4RERUcXTx+80gBAah4hSMGgOgFYZKElLSsvJwLT79v4AUn44bCQ+RqypdfyKllQx/zeymk9Nkb/0YhoMRiYWW9/Cyx5rRrV748YmIqGIxCOkIg1DxdHnaKk+lxu37jxARn4ar8Rk4eSsJl++kPXe9GjJjuNiYQ2klg1Ihh4OVHEorOZSKf29WcijMTCCRFB+WigtBBRiGiIiqHgYhHWEQerby6sj824U7eG/zhRcvEIDcxAhKq39DkuKJoGQlh7W5KYatOfXcx7i6sBdPkxERVSG6+P1mZ2l6LqmRBG09bHX+uCXtkL3k1Sawt5LhXlo2EtKzcS89G/Fp2UhIe/z/KZl5yM5TIyopE1FJmWWuZ/G+CCwa0LjM6xMRUdXDIER609rdBo4K+XM7ZA9qUfuZR6Cy81S4l/44GCU88d+CZVfupiOnBHMdvUiIIiKiqolBiPRGaiRBUIAXJmwIhwRFd8gOCvB67mk4uYkUbrYWcLO1KPL+uTsv4adTMc+tp46teckKJyKiaoMzS5Ne9fJ2xOqRPlAqtE+TKRVynQ2dn93Hq0Tt7qRmI/LBoxd+PiIiqjrYWRrsLF0ZlPfM0s8bNVZwRMpEKsG49u6Y3LUerOQlnxmbiIgqHkeN6QiDkGF41jxCM3p5YtGeqwi5cR8AYGthiuk9G2JwSxde6oOIqJJiENIRBiHD8byZpY9eS8SivRG4ff/xKbJGjlaY18+rXEbNERHRi2EQ0hEGIXpSnkqNH0Oj8eWhG0jPzgcA9HpJidl9GsGVHaqJiCoNBiEdYRCioiQ/ysX/Dt7Az6ejoRaAqdQIb3R0x6Qu9VBDxgGXRET6xiCkIwxC9CzXEzKwaE8ETtx8AACoZSnDBz0bYpBPbRix/xARkd4wCOkIgxA9jxACh64m4pO9EZqJF72drRAU8BJa1bHRc3VERIaJQUhHGISopHLyVVh/MgorDt9ERs7j/kN9mzhiVm9P1K6pu/5D5T2dABFRdcAgpCMMQlRaDx7m4PMD17E5LBZCADJjI7zdqS4mdPaAuemL9R/afzkeC3ZHID4tW7PMUSFHUICXTiaYJCKqLhiEdIRBiMrqyt00LNoTgVO3kwEADlYyzOjliQHNnMvUf2j/5XhM2BBe6NprBY+kq9m2iYiqAwYhHWEQohchhMAfVxLwyb6riE3OAgA0c7HGvAAv+LjW1Gr7rFNeKrVAhyVHtI4EPangIrQnZnTlaTIiIujm95tjgIlekEQiQS9vR3RuaI+1f0Xi6yM3cSE2FQNXnUT/Zk6Y2dsTjgqz557yOhOZXGwIAh5fAiQ+LRtnIpM5wSMRkY7wiBB4RIh0KzE9G5/9cR1bw+MgBCA3MUL3Rg7YczG+UNuC4zrLBzfFxTtpWPdX1HMf/8uhzdC/mbNuiyYiqoJ4akxHGISoPFyKS8OC3VdwNjpFp4/785u+aF/PTqePSURUFeni99tIxzUR0b8a11bg13fa4t2u9UvU3lEhh8UT1z0rzsI9ETgUcQ9P/htGpRYIvZWE3y7cQeitJKjUBv/vGyKiEmEfIaJyJJFI4GFvUaK2M3t7QmZshAkbwgGg0MgxADAzkeJ6Qgbe/PEsWrjVxAc9GyI1M5fD7YmIyohHhIjKmb2lvMTtenk7YvVIHygV2us4KuT4v5E+CJ3VFe/4eUBuYoRz0SkY+u0pvLMhvFAn64S0bEzYEI79lwv3SyIiov+wjxDYR4jKV8Gw+IS07CKP8hQ1LP55M0snpmfjy8P/4OfTMcU+L4fbE1F1xz5CRFWA1EiCoAAvAP+NEitQ8HdQgJdWWJEaSdDWwxb9mzmjrYdtoSBjbyVHvyZOz3zeJ4fbExFR0RiEiCpAcae8lAp5mWeLTswofs6hsrQjIjJE7CxNVEF6eTuih5dSZxdTLU3fIyIiKhqDEFEFKjjlpQut3W3gqJA/t+9Ra3cbnTwfEVF1xFNjRFXUk32PnlZc3yMiItLGIERUhRX0PVKYaR/cfZG+R0REhoSnxoiquF7ejkjLysOMbZfg5WSFuX29XqjvERGRIWEQIqoGJJLHoUdpJeeV6YmISqHSnxq7c+cORo4cCVtbW5ibm6NZs2Y4d+6c5n4hBObPnw8nJyeYmZmhc+fOuHLlih4rJiIioqqiUgehlJQUtG/fHiYmJvj9998RERGBzz//HNbW1po2S5cuxfLly7Fy5UqEhYVBqVSiR48eyMjI0F/hREREVCVU6lNjS5YsgYuLC9atW6dZVqdOHc3/CyHwxRdfYM6cORg4cCAAYP369XBwcMDGjRsxfvz4ii6ZiIiIqpBKfURo165daNmyJV577TXY29ujefPmWLNmjeb+yMhIJCQkwN/fX7NMJpPBz88PJ0+e1EfJREREVIVU6iB0+/ZtrF69GvXr18cff/yBd955B++++y5+/PFHAEBCQgIAwMHBQWs9BwcHzX1FycnJQXp6utaNiIiIDE+lPjWmVqvRsmVLLF68GADQvHlzXLlyBatXr8bo0aM17QpGzBQQQhRa9qTg4GAsWLCgfIomIiKiKqNMR4QePXqEuXPnol27dqhXrx7q1q2rddMVR0dHeHlpz5zbqFEjxMTEAACUSiUAFDr6k5iYWOgo0ZNmzZqFtLQ0zS02NlZnNRMREVHVUaYjQm+++SZCQkIwatQoODo6PvPoy4to3749rl+/rrXsxo0bcHNzAwC4u7tDqVTi4MGDaN68OQAgNzcXISEhWLJkSbGPK5PJIJPJyqVmIiIiqjrKFIR+//137N27F+3bt9d1PVref/99tGvXDosXL8bgwYNx5swZfPvtt/j2228BPD4lFhgYiMWLF6N+/fqoX78+Fi9eDHNzcwwfPrxcayMiIqKqr0xBqGbNmrCxKf8rWrdq1Qo7duzArFmzsHDhQri7u+OLL77AiBEjNG0+/PBDZGVlYeLEiUhJSYGvry8OHDgAS0vLcq+PiIiIqjaJEEKUdqUNGzbgt99+w/r162Fubl4edVWo9PR0KBQKpKWlwcrKSt/lEJXaL2dj8eHWi+jqaY+1Y1vpuxwiogqhi9/vMh0R+vzzz3Hr1i04ODigTp06MDEx0bo/PDy8TMUQERERVaQyBaEBAwbouAwiIiKiilemIBQUFKTrOoiIiIgq3AtNqHju3DlcvXoVEokEXl5emiHsRFSxCrr6JaRnI/RWElq720BqVD7TWhARVSdlCkKJiYkYOnQojh07BmtrawghkJaWhi5dumDz5s2oVauWruskomLsvxyPxfuuAgAi7qZj2JpTcFTIERTghV7ejnqujoiocivTzNJTpkxBeno6rly5guTkZKSkpODy5ctIT0/Hu+++q+saiagY+y/HY8KGcKRl5WstT0jLxoQN4dh/Ob7I9VRqgdBbSfjtwh2E3kqCSl3qwaNERNVCmYbPKxQKHDp0CK1aaQ/TPXPmDPz9/ZGamqqr+ioEh89TVaRSC3RYcgTxadlF3i8BoFTIcWJGV63TZPsvx2PB7git9XgEiYiqIl38fpfpiJBarS40ZB4ATExMoFary1QIEZXOmcjkYkMQAAgA8WnZ2HQmBvczciCE0BxBenq95x1BIiKqrsp0RKh///5ITU3Fpk2b4OTkBAC4c+cORowYgZo1a2LHjh06L7Q88YgQVUW/XbiD9zZfKHF7U2MjqNSi2NNgxR1BIiKqrPR2RGjlypXIyMhAnTp14OHhgXr16sHd3R0ZGRlYsWJFmQohotKxt5SXqJ2NhSkkEiA3X/3MvkAFR5DORCbrqEIiosqvTKPGXFxcEB4ejoMHD+LatWsQQsDLywvdu3fXdX1EVIzW7jZwVMiRkJaNouLNk0d41ELgp9AoLNxz9bmPm5hR/Ok2IqLq5oXmEerRowd69Oihq1qIqBSkRhIEBXhhwoZwSACtMFRwYisowAtSIwmkkKCRo6JEj1vSI01ERNVBiYPQV199hbfffhtyuRxfffXVM9tyCD1Rxejl7YjVI30KjQJTFjEKrKRHkFq725R/4URElUSJO0u7u7vj7NmzsLW1hbu7e/EPKJHg9u3bOiuwIrCzNFV1KrXAmchkJGZkw95SXuzM0gWjxoCijyCtHunDIfREVGXo4ve7TKPGqhsGITIknEeIiKoLvY0ae5pKpcKFCxeQkpKii4cjonLUy9sRJ2Z0RYd6dgCAkb5uODGjK0MQERmkMgWhwMBAfP/99wAeh6BOnTrBx8cHLi4uOHbsmC7rI6JyIDWSoJalDABQx86c8wYRkcEqUxDaunUrmjZtCgDYvXs3oqKicO3aNQQGBmLOnDk6LZCIiIiovJQpCD148ABKpRIAsG/fPrz22mto0KAB3njjDVy6dEmnBRIRERGVlzIFIQcHB0REREClUmH//v2aiRQzMzMhlUp1WiARERFReSnThIrjxo3D4MGD4ejoCIlEoplU8fTp0/D09NRpgURERETlpUxBaP78+fD29kZsbCxee+01yGSPO11KpVLMnDlTpwUSERERlZcyX2Jj0KBBhZaNGTPmhYohIiIiqki8xAYREREZrBIHof/9738YMWIE5HI5/ve//xXbTiKRMAgRVXIqtcD9jBwAQNSDTKjUgnMJEZFB4iU2wEtskGHhJTaIqLqoNJfYIKKqoeCiq0+GIABISMvGhA3h2H85Xk+VERHpR5mC0KBBg/Dpp58WWv7ZZ5/htddee+GiiEj3VGqBBbsjUNQh4IJlC3ZHQKU2+IPERGRAyhSEQkJC0Ldv30LLe/XqhT///POFiyIi3TsTmVzoSNCTBID4tGyciUyuuKKIiPSsTEHo4cOHMDU1LbTcxMQE6enpL1wUEeleYkbxIags7YiIqoMyBSFvb29s2bKl0PLNmzfDy8vrhYsiIt2zt5TrtB0RUXVQpgkV586di1dffRW3bt1C165dAQCHDx/Gpk2b8Ouvv+q0QCLSjdbuNnBUyJGQll1kP6ECp28nobW7DYfTE5FBKNMRoZdffhk7d+7EzZs3MXHiREybNg1xcXE4dOgQBgwYoOMSiUgXpEYSBAU8PmL7rIjzxeF/MHrtac08Q0RE1RnnEQLnESLD8qx5hB7lqPDRzsvIylOhlqUMXw1tjrYetnqsloioeLr4/S5zEEpNTcXWrVtx+/ZtTJ8+HTY2NggPD4eDgwOcnZ3LVIy+MAiRoVGpBc5EJiMxIxv2lnKtU2H/3MvAxJ/D8U/iQxhJgPe7N8DELvV4qoyIKh29BaGLFy+ie/fuUCgUiIqKwvXr11G3bl3MnTsX0dHR+PHHH8tUjL4wCBFpy8zNx9ydV7AtPA4A0LG+Hf43pBnsasj0XBkR0X/0NrP01KlTMXbsWPzzzz+Qy/8bYdK7d2/OI0RUDZibGuPzwU3x2aAmkJsY4fg/D9Dny+M4dTtJ36UREelUmYJQWFgYxo8fX2i5s7MzEhISXrgoIqocXmvpgl2TO6CefQ0kZuRg+JpTWHnkH6g5+zQRVRNlCkJyubzIiROvX7+OWrVqvXBRRFR5NHCwxK7J7THQxxlqASw7cANj1p1B0kOOKiOiqq9MQah///5YuHAh8vLyAAASiQQxMTGYOXMmXn31VZ0WSET6Z25qjM9fa4qlT54q++o4TvNUGRFVcWXqLJ2eno4+ffrgypUryMjIgJOTExISEtC2bVvs27cPFhYW5VFruWFnaaKSu56QgYk/n8Ot+49gJAGm+TfEBD8PGHFUGRFVML0OnweAI0eOIDw8HGq1Gj4+PujevXtZH0qvGISISudRTj4+2nkZO87fAQD4NaiF5YObwpajyoioAuklCOXn50Mul+PChQvw9vYu05NWNgxCRKUnhMAvZ2Mx77cryMlXQ2klx4rhzdGqjo2+SyMiA6GX4fPGxsZwc3ODSqUq0xMSUfUgkUgwpJUrfpvcHnVrWSAhPRtDvz2F1cducVQZEVUZZeos/dFHH2HWrFlITk7WdT1EVMV4Kq2wa3IH9G/mBJVaYMn+a3h9fRiSH+XquzQioucqUx+h5s2b4+bNm8jLy4Obm1uhztHh4eE6K7Ai8NQY0YsTQmBzWCyCdl1Bbr4ajgo5VgxrjpZ1bJ55SQ8iorLSxe+3cVlWGjBgACQSCXi9ViIqIJFIMKy1K5rWtsbkjeG4/eARhnx7Ci83dUTorSQkpP8371DBRV57eTvqsWIiolIeEcrMzMQHH3yAnTt3Ii8vD926dcOKFStgZ2dXnjWWOx4RItKthzn5mLX9Enb/fbfI+wuOBa0e6cMwRERlVuGdpYOCgvDDDz+gb9++GDZsGA4dOoQJEyaU6YmJqPqqITPG/wY3hcKs6IPOBf/6WrA7Aip2rCYiPSrVqbHt27fj+++/x9ChQwEAI0aMQPv27aFSqSCVSsulQCKqmsKiUpCWlV/s/QJAfFo2zkQmo62HbcUVRkT0hFIdEYqNjUXHjh01f7du3RrGxsa4e7fow99EZLgSM7J12o6IqDyUKgipVCqYmppqLTM2NkZ+fvH/6iMiw2RvKddpOyKi8lCqU2NCCIwdOxYy2X/T6GdnZ+Odd97RGkK/fft23VVIRFVSa3cbOCrkSEjLxrN6AV25m4Y2dW0gkXA4PRFVvFIFoTFjxhRaNnLkSJ0VQ0TVh9RIgqAAL0zYEA4JUGwY+njvVYRFJWPpoKZQmJlUZIlERC920dXqgsPnicrP/svxWLA7AvFp//UFclTIMa9fIyRm5OLjvRHIUwm42Jjh6+E+aFLbWn/FElGVoverz1cXDEJE5etZM0tfjEvFxJ/DEZeSBVOpEeb0bYTRbd14qoyInotBSEcYhIj0Ky0zDx9s/RsHIu4BAPo2dkTwq41hJeepMiIqnl6uPk9EpGsKcxN8M6oF5vbzgrGRBHsvxSNgxQlcvpOm79KIqJpjECKiSkEikeCNDu749Z22cLY2Q3RSJgauPokNp6J5XUMiKjdVKggFBwdDIpEgMDBQs0wIgfnz58PJyQlmZmbo3Lkzrly5or8iieiFNHetib3vdkD3RvbIzVfjo52X8e7mC3iYw/nKiEj3qkwQCgsLw7fffosmTZpoLV+6dCmWL1+OlStXIiwsDEqlEj169EBGRoaeKiWiF2Vtboo1o1tidh9PSI0k2P33Xby84gSuxqfruzQiqmaqRBB6+PAhRowYgTVr1qBmzZqa5UIIfPHFF5gzZw4GDhwIb29vrF+/HpmZmdi4caMeKyaiFyWRSPB2Jw/8Mr4NHBVy3H7wCAO+/gubz8TwVBkR6UyVCEKTJk1C37590b17d63lkZGRSEhIgL+/v2aZTCaDn58fTp48Wezj5eTkID09XetGRJVTCzcb7H23Izo3rIWcfDVmbr+Eqb/8jUc8VUZEOlDpg9DmzZsRHh6O4ODgQvclJCQAABwcHLSWOzg4aO4rSnBwMBQKhebm4uKi26KJSKdsLEyxdkwrfNirIaRGEuw4fwcvrzyB6wk8BU5EL6ZSB6HY2Fi899572LBhA+Ty4i/M+PTEa0KIZ07GNmvWLKSlpWlusbGxOquZiMqHkZEEEzvXw6a32sDBSoZb9x+h/9cn8OtZfn6JqOwqdRA6d+4cEhMT0aJFCxgbG8PY2BghISH46quvYGxsrDkS9PTRn8TExEJHiZ4kk8lgZWWldSOiqqG1++NTZR3r2yE7T40Ptl7E9F//RlauSt+lEVEVVKmDULdu3XDp0iVcuHBBc2vZsiVGjBiBCxcuoG7dulAqlTh48KBmndzcXISEhKBdu3Z6rJyIypNdDRnWj2uN6f4NYCQBtp6LQ/+vT+Bm4uNTZSq1QOitJPx24Q5CbyVBpWbnaiIqWqmuPl/RLC0t4e3trbXMwsICtra2muWBgYFYvHgx6tevj/r162Px4sUwNzfH8OHD9VEyEVUQIyMJJnetjxZuNnh383ncuPcQASv+wtBWtbH/yr1CF3kNCvBCL29HPVZMRJVRpT4iVBIffvghAgMDMXHiRLRs2RJ37tzBgQMHYGlpqe/SiKgCtPWwxb53O6J9PVtk5amw7mS0VggCgIS0bEzYEI79l+P1VCURVVa86Cp40VWi6iA3Xw2fRQeLnYFaAkCpkOPEjK6QGvHK9kTVAS+6SkT0r3PRKc+8DIcAEJ+WjTORyRVXFBFVegxCRFQtJGZkP78RgMgHD8u5EiKqShiEiKhasLcsfq6xJwXtuoJpv/yNv2NTy7cgIqoSKvWoMSKikmrtbgNHhRwJadkoruOjsZEEeSqBbeFx2BYehya1FRjZxg0vN3WC3ERaofUSUeXAI0JEVC1IjSQICvAC8Lhj9JMk/95WDGuGHRPbYWBzZ5hKjXAxLg0fbr0I38WH8cneCEQ9eFTRZRORnnHUGDhqjKg62X85Hgt2Rzx3HqGkhzn49VwcNpyKRlxKlmZ5pwa1MLqNG7p42nN0GVElp4vfbwYhMAgRVTcqtcCZyGQkZmTD3lKO1u42xYYalVog5EYifgqNxrEb91HwjehsbYbhvq4Y0soFdjVkFVg9EZUUg5COMAgREQBEJz3CxtMx2HI2FqmZeQAAU6kR+jRWYlRbN/i41nzmBZ2JqGIxCOkIgxARPSk7T4U9F+Px06lordFlXo5WGNXWDf2bOcHclGNNiPSNQUhHGISIqDgX41LxU2g0dv19Fzn5agCApdwYg1rUxsg2bvCoVUPPFRIZLgYhHWEQIqLnSc3MxdZ/O1dHJWVqlrevZ4tRbdzQvZEDjKUciEtUkRiEdIRBiIhKSq0WOH7zAX4KjcaRa/eg/vcbVGklx3BfVwxt5QJ7q5JN7khEL4ZBSEcYhIioLOJSMh93rg6LRdKjXACPJ23s5a3EqDZuaO1uw87VROWIQUhHGISI6EXk5Kuw/3ICfgyNxrnoFM3yhg6WGNnWDa80d0YNGTtXE+kag5COMAgRka5cuZuGDadisPP8HWTlqQAAFqZSDPSpjVFt3dDAwVLPFRJVHwxCOsIgRES6lp6dh23n4vDTqWjcvv/fpTt83W0wqq0ber6khAk7VxO9EAYhHWEQIqLyIoTAyVtJ+Ck0Ggev3oPq397VtSxlGNbaFcNau8BRYabnKomqJgYhHWEQIqKKEJ+WhU1nYrHpTAzuZ+QAeHyx2B6NHDC6rRvaetiyczVRKTAI6QiDEBFVpNx8NQ5EPO5cfSYyWbPco5YFRrZxw0Cf2lCYmeixQqKqgUFIRxiEiEhfridkYMOpaGwPj8Oj3Medq81MpBjQ3Bmj2rjBy4nfSUTFYRDSEQYhItK3hzn52BH+uHP1jXsPNctbutXEqLZu6OWthMxYqscKiSofBiEdYRAiospCCIEzkcn46VQ09l9OQP6/nattLUwxpJULRrRxg7M1O1cTAQxCOsMgRESVUWJ6NjaHxWLj6RgkpGcDAIwkQFdPB4xq64aO9exgZMTO1WS4GIR0hEGIiCqzfJUah67ew0+novHXzSTN8jq25hjZxg2DWtSGtbmpHisk0g8GIR1hECKiquJm4kNsOBWNbefikJGTDwCQGRuhfzMnjGpTB41rK/RcIVHFYRDSEQYhIqpqMnPzsfP8XfwYGoVrCRma5U1drDGqjRv6NXGE3ISdq6l6YxDSEQYhIqqqhBAIj0nBT6HR2HcpAbkqNQCgprkJBrd0wQhfN7jamuu5SqLywSCkIwxCRFQdPHiYgy3/dq6+k5oFAJBIgM4NamFUWzf4NbCHlJ2rqRphENIRBiEiqk5UaoEj1xLx06lo/Hnjvma5i40ZRvi6YXBLF9hYmGq1PxOZjMSMbNhbytHa3YaBiaoEBiEdYRAiouoq6sEjbDgVjV/PxSEtKw8AYGpshH6NHTGyrRvupWVh4Z6riE/L1qzjqJAjKMALvbwd9VU2UYkwCOkIgxARVXdZuSrsvngXP4VG49KdtGe2LTgWtHqkD8MQVWoMQjrCIEREhuRCbCp+DI3C9vA7xbaRAFAq5DgxoytPk1GlpYvfbyMd10RERJVcMxdrvNbC5ZltBID4tGyciUyumKKI9IRBiIjIACVmZD+/EYB76VnlXAmRfjEIEREZIHtLeYnafXc8EtefmLCRqLphECIiMkCt3W3gqJDjeb1/Lt9NR+8v/8TcnZeR8ii3QmojqkgMQkREBkhqJEFQgBcAFApDkn9vHw94Cb29lVAL4KdT0ei87BjW/RWJvH9nryaqDjhqDBw1RkSGa//leCzYHfHMeYRCbyVhwe4rmmua1bOvgbn9vODXoJZeaiYqwOHzOsIgRESGrCQzS6vUApvDYvD5gRtI/vcUWTdPe8zp2wh1a9XQR9lEDEK6wiBERFQyaVl5+OrwP1h/Mgr5agETqQRj2tbBlG71oTAz0Xd5ZGAYhHSEQYiIqHRu3X+Ij/dE4Oj1x9cys7UwxTT/hhjSyoUTMFKFYRDSEQYhIqKyOXY9EYv2RODW/UcAgEaOVggK8EKburZ6rowMAYOQjjAIERGVXZ5KjZ9Co/HFoRtIz84HAPRprMSs3o3gYmOu5+qoOmMQ0hEGISKiF5f8KBfLD17HxtMxUIvHV7l/u2NdTOjsAQuZsb7Lo2qIQUhHGISIiHTnWkI6Fu6OwMlbSQAABysZZvTyxIBmzjBi/yHSIQYhHWEQIiLSLSEEDkTcwyd7ryImORMA0NTFGkEBXvBxrann6qi6YBDSEQYhIqLykZOvwtoTUVh55B88ylUBAF5p7owZvTyhVJTsemdExWEQ0hEGISKi8pWYno3P/riOX8/FAQDMTKSY2NkDb3WqC7mJVM/VUVXFIKQjDEJERBXjYlwqFuyOwLnoFACAs7UZZvdphD6NlZBI2H+ISodBSEcYhIiIKo4QArsvxiN431XNNc5au9tgXj8veDsr9FwdVSUMQjrCIEREVPGyclX4v5Bb+ObPW8jOU0MiAYa0dMH0ng1hV0Om7/KoCmAQ0hEGISIi/bmbmoVPf7+GXX/fBQBYyowxpVs9jG3nDlNjIz1XR5UZg5COMAgREelfWFQyFu6OwKU7aQAAdzsLzOnTCN0a2bP/EBWJQUhHGISIiCoHtVpga3gclu6/jgcPcwAAHevbYV4/L9R3sNRzdVTZMAjpCIMQEVHlkpGdh6+P3sLaE5HIVakhNZJgVBs3BHavD2tzU32XR5UEg5COMAgREVVO0UmP8MneqzgQcQ8AYG1ugqk9GmB4a1cYS9l/yNAxCOkIgxARUeX2180HWLg7AtfvZQAAGjjUwNx+XuhYv5aeKyN9YhDSEQYhIqLKL1+lxqawWCw/cB0pmXkAgO6NHPBR30aoY2eh5+pIHxiEdIRBiIio6kjLzMMXh2/gp9Bo5KsFTKQSvN7eHZO71oOl3ETf5VEF0sXvd6U+wRocHIxWrVrB0tIS9vb2GDBgAK5fv67VRgiB+fPnw8nJCWZmZujcuTOuXLmip4qJiKi8KcxNEBTwEvYHdkSnBrWQpxL45s/b6LLsGLaExUClNvh/31MpVOogFBISgkmTJuHUqVM4ePAg8vPz4e/vj0ePHmnaLF26FMuXL8fKlSsRFhYGpVKJHj16ICMjQ4+VExFReatnb4n141ph7diWqGtngQcPczFj2yW8vPIEzkQm67s8qiKq1Kmx+/fvw97eHiEhIejUqROEEHByckJgYCBmzJgBAMjJyYGDgwOWLFmC8ePHl+hxeWqMiKhqy81X48fQKHx5+B9kZOcDAPo2ccSs3p6oXdNcz9VRean2p8aelpb2eLZRGxsbAEBkZCQSEhLg7++vaSOTyeDn54eTJ08W+zg5OTlIT0/XuhERUdVlamyENzvWxbHpnTHc1xVGEmDvxXh0+zwEyw9cR2Zuvr5LpEqqygQhIQSmTp2KDh06wNvbGwCQkJAAAHBwcNBq6+DgoLmvKMHBwVAoFJqbi4tL+RVOREQVxraGDItfaYw9UzqiTV0b5OSr8dWRm+i6LAQ7z99BFToJQhWkygShyZMn4+LFi9i0aVOh+56+Bo0Q4pnXpZk1axbS0tI0t9jYWJ3XS0RE+uPlZIVNb7XB6hE+qF3TDAnp2QjccgGvrj6Jv2NT9V0eVSJVIghNmTIFu3btwtGjR1G7dm3NcqVSCQCFjv4kJiYWOkr0JJlMBisrK60bERFVLxKJBL0bO+LQVD980LMhzE2lCI9JRf+v/8K0X/7GvfRsfZdIlUClDkJCCEyePBnbt2/HkSNH4O7urnW/u7s7lEolDh48qFmWm5uLkJAQtGvXrqLLJSKiSkhuIsWkLvVwdHpnDPRxBgBsC49Dl2XH8PXRm8jOU+m5QtKnSh2EJk2ahA0bNmDjxo2wtLREQkICEhISkJWVBeBx2g8MDMTixYuxY8cOXL58GWPHjoW5uTmGDx+u5+qJiKgycbCSY/ngZtg5qT2au1ojM1eFz/64ju7LQ/D7pXj2HzJQlXr4fHH9fNatW4exY8cCeHzUaMGCBfjmm2+QkpICX19ffP3115oO1SXB4fNERIZFCIHfLtzFp79fQ8K/p8ja1LXBvH4vwcuJvwNVBS+xoSMMQkREhikzNx//d+wWvvnzNnLy1TCSAENbu2JajwawrSHTd3n0HAxCOsIgRERk2OJSMhH8+zXsvRgPALCUG+O9bvUxum0dmBpX6l4kBo1BSEcYhIiICADORCZjwe4ruHL38US7de0sMLefF7p42uu5MioKg5COMAgREVEBlVpg67lYfPbHdTx4mAsA8GtQC3P7NUI9e0s9V0dPYhDSEQYhIiJ6WkZ2HlYeuYm1f0UiTyVgbCTBqLZuCOzWAApzE32XR2AQ0hkGISIiKk7kg0f4ZO9VHLp6DwBQ09wEU/0bYlgrFxhL2X9InxiEdIRBiIiInuf4P/excHcE/kl8CADwVFpiXj8vtKtnp+fKDBeDkI4wCBERUUnkq9T4+XQMlh+8gbSsPACAv5cD5vRtBDdbCz1XZ3gYhHSEQYiIiEoj5VEuvjh0AxtOx0ClFjCVGuH1Du6Y3LUeasiM9V2ewWAQ0hEGISIiKosb9zKwaE8Ejv/zAABQy1KGD3o2xCCf2jAyKvrqCKQ7DEI6wiBERERlJYTA4auJ+HhvBKKSMgEAjZ0VCArwQss6NnqurnpjENIRBiEiInpRuflq/HAyEisO30RGTj4A4OWmTpjZ2xNO1mZ6rq56YhDSEQYhIiLSlfsZOfj8wHVsORsLIQC5iRHe8fPA+E4eMDOV6ru8aoVBSEcYhIiISNcu30nDwj0ROBOZDABwUsgxs08jBDRxhETC/kO6wCCkIwxCRERUHoQQ2HcpAYv3XcWd1CwAQEu3mpgX4IUmta31W1w1wCCkIwxCRERUnrLzVFjz522sOnYLWXkqSCTAIJ/a+KBXQ9hbyvVdXpXFIKQjDEJERFQREtKysWT/New4fwcAYGEqxeSu9fF6hzqQGbP/UGkxCOkIgxAREVWk8JgULNgdgb9jUwEArjbmmNO3Efy9HNh/qBQYhHSEQYiIiCqaWi2w88IdfPr7NSRm5AAA2nnYYl6AFzyV/C0qCQYhHWEQIiIifXmUk4/Vx27h2+O3kZuvhpEEGO7riqk9GsLGwlTf5VVqDEI6wiBERET6FpucieDfr2LfpQQAgJXcGIHdG2BUWzeYSI30XF3lxCCkIwxCRERUWZy6nYQFuyNwNT4dAOBRywJz+3mhc0N7PVdW+TAI6QiDEBERVSYqtcCWsFh8fuA6kh7lAgC6NKyFj/p5waNWDT1XV3kwCOkIgxAREVVG6dl5WHH4H6z7Kwr5agFjIwnGtquDKd3qQ2Fmou/y9I5BSEcYhIiIqDK7ff8hPtl7FYevJQIAbCxMMc2/AYa2coXUyHCH2zMI6QiDEBERVQUhN+5j0Z4I3Ex8CABo5GiFef280NbDVs+V6QeDkI4wCBERUVWRp1Jjw6lo/O/gDaRn5wMAensrMbtPI7jYmOu5uorFIKQjDEJERFTVJD/Kxf8O3sDPp6OhFoCpsRHe6uiOiZ3rwUJmrO/yKgSDkI4wCBERUVV1LSEdi/ZE4K+bSQAAe0sZZvTyxCvNnWFUzfsPMQjpCIMQERFVZUIIHIy4h0/2XUV0UiYAoKmLNeb180ILt5p6rq78MAjpCIMQERFVBzn5Kqz7KworDv+DR7kqAMCAZk6Y0dsTjgozPVenewxCOsIgRERE1UliRjaW/XEdv56LgxCAmYkUEzp74O1OdSE3keq7PJ1hENIRBiEiIqqOLsWlYcHuKzgbnQIAcLY2w6w+nujb2BESSdXvP8QgpCMMQkREVF0JIbDnYjyC913F3bRsAEDrOjaYF+AFb2eFnqt7MQxCOsIgRERE1V1Wrgrf/nkbq0NuIjtPDYkEGNzCBdN7NkQtS5m+yysTBiEdYRAiIiJDcTc1C0v2X8NvF+4CAGrIjDGlaz2MbV8HMuOq1X+IQUhHGISIiMjQnItOxoLdEbgYlwYAqGNrjjl9vdC9kX2V6T/EIKQjDEJERGSI1GqB7efvYMn+a7ifkQMA6FjfDnP7eaGBg6Weq3s+BiEdYRAiIiJD9jAnH18fvYnvj0ciV6WG1EiCkb6ueL9HA1ibm+q7vGIxCOkIgxAREREQk5SJT/ZF4I8r9wAACjMTTO3RACN8XWEsNdJzdYUxCOkIgxAREdF/Tt56gIW7I3AtIQMAUN++BuYFeKFj/Vp6rkwbg5COMAgRERFpy1epsTksFp8fuI6UzDwAQPdG9pjT1wvudhZ6ru4xBiEdYRAiIiIqWlpmHr48/A9+DI1CvlrARCrBuPbumNy1HqzkJnqtjUFIRxiEiIiInu1m4kN8vDcCx67fBwDY1TDFdP+GeK2lC6RG+hluzyCkIwxCREREJXP0WiIW7Y3A7fuPAAAvOVlhXj8v+Na1rfBaGIR0hEGIiIio5PJUavwYGo0vDt1ARnY+AKBvY0fM6uOJ2jXNK6wOBiEdYRAiIiIqvaSHOVh+8AY2nYmBWgAyYyO83akuJnT2gLmpMQBApRY4E5mMxIxs2FvK0drdRmen0hiEdIRBiIiIqOyuxqdj4e4IhN5OAgAoreSY0bshZMZGWLTnKuL/veo9ADgq5AgK8EIvb8cXfl4GIR1hECIiInoxQgj8cSUBn+y7itjkrGLbFRwLWj3S54XDkC5+vyvfNJFERERU5UgkEvTydsTB9/0w3b8Bijv5VXD0ZcHuCKjU+j8WwyBEREREOiM3kaKFmw2eFXEEgPi0bJyJTK6osorFIEREREQ6lZiR/fxGpWhXnhiEiIiISKfsLeU6bVeeGISIiIhIp1q728BRIS+2n5AEj0ePtXa3qciyisQgRERERDolNZIgKMALAAqFoYK/gwK89HZpjicxCBEREZHO9fJ2xOqRPlAqtE9/KRVynQyd1xVjfRdARERE1VMvb0f08FKW28zSusAgREREROVGaiRBW4+KvyBrSfHUGBERERksBiEiIiIyWNUmCK1atQru7u6Qy+Vo0aIFjh8/ru+SiIiIqJKrFkFoy5YtCAwMxJw5c3D+/Hl07NgRvXv3RkxMjL5LIyIiokqsWlx93tfXFz4+Pli9erVmWaNGjTBgwAAEBwc/d31efZ6IiKjq4dXnAeTm5uLcuXPw9/fXWu7v74+TJ08WuU5OTg7S09O1bkRERGR4qnwQevDgAVQqFRwcHLSWOzg4ICEhoch1goODoVAoNDcXF5eKKJWIiIgqmSofhApIJNqTMwkhCi0rMGvWLKSlpWlusbGxFVEiERERVTJVfkJFOzs7SKXSQkd/EhMTCx0lKiCTySCTySqiPCIiIqrEqnwQMjU1RYsWLXDw4EG88sormuUHDx5E//79S/QYBf3F2VeIiIio6ij43X6RcV9VPggBwNSpUzFq1Ci0bNkSbdu2xbfffouYmBi88847JVo/KSkJANhXiIiIqArKyMiAQqEo07rVIggNGTIESUlJWLhwIeLj4+Ht7Y19+/bBzc2tROvb2NgAAGJiYsq8Ials0tPT4eLigtjYWE5dUMG47fWD211/uO31p7y2vRACGRkZcHJyKvNjVIt5hF4U5xHSH257/eG21w9ud/3httefyrztq82oMSIiIqLSYhAiIiIig8UghMfD6YOCgjikXg+47fWH214/uN31h9tefyrztmcfISIiIjJYPCJEREREBotBiIiIiAwWgxAREREZLAYhIiIiMlgGH4RWrVoFd3d3yOVytGjRAsePH9d3SdVOcHAwWrVqBUtLS9jb22PAgAG4fv26VhshBObPnw8nJyeYmZmhc+fOuHLlip4qrp6Cg4MhkUgQGBioWcbtXr7u3LmDkSNHwtbWFubm5mjWrBnOnTunuZ/bX/fy8/Px0Ucfwd3dHWZmZqhbty4WLlwItVqtacPtrht//vknAgIC4OTkBIlEgp07d2rdX5LtnJOTgylTpsDOzg4WFhZ4+eWXERcXV4Gv4nGhBmvz5s3CxMRErFmzRkRERIj33ntPWFhYiOjoaH2XVq307NlTrFu3Tly+fFlcuHBB9O3bV7i6uoqHDx9q2nz66afC0tJSbNu2TVy6dEkMGTJEODo6ivT0dD1WXn2cOXNG1KlTRzRp0kS89957muXc7uUnOTlZuLm5ibFjx4rTp0+LyMhIcejQIXHz5k1NG25/3fv444+Fra2t2LNnj4iMjBS//vqrqFGjhvjiiy80bbjddWPfvn1izpw5Ytu2bQKA2LFjh9b9JdnO77zzjnB2dhYHDx4U4eHhokuXLqJp06YiPz+/wl6HQQeh1q1bi3feeUdrmaenp5g5c6aeKjIMiYmJAoAICQkRQgihVquFUqkUn376qaZNdna2UCgU4v/+7//0VWa1kZGRIerXry8OHjwo/Pz8NEGI2718zZgxQ3To0KHY+7n9y0ffvn3F66+/rrVs4MCBYuTIkUIIbvfy8nQQKsl2Tk1NFSYmJmLz5s2aNnfu3BFGRkZi//79FVa7wZ4ay83Nxblz5+Dv76+13N/fHydPntRTVYYhLS0NwH8Xu42MjERCQoLWeyGTyeDn58f3QgcmTZqEvn37onv37lrLud3L165du9CyZUu89tprsLe3R/PmzbFmzRrN/dz+5aNDhw44fPgwbty4AQD4+++/ceLECfTp0wcAt3tFKcl2PnfuHPLy8rTaODk5wdvbu0Lfi2px9fmyePDgAVQqFRwcHLSWOzg4ICEhQU9VVX9CCEydOhUdOnSAt7c3AGi2d1HvRXR0dIXXWJ1s3rwZ4eHhCAsLK3Qft3v5un37NlavXo2pU6di9uzZOHPmDN59913IZDKMHj2a27+czJgxA2lpafD09IRUKoVKpcInn3yCYcOGAeB+X1FKsp0TEhJgamqKmjVrFmpTkb/DBhuECkgkEq2/hRCFlpHuTJ48GRcvXsSJEycK3cf3QrdiY2Px3nvv4cCBA5DL5cW243YvH2q1Gi1btsTixYsBAM2bN8eVK1ewevVqjB49WtOO21+3tmzZgg0bNmDjxo146aWXcOHCBQQGBsLJyQljxozRtON2rxhl2c4V/V4Y7KkxOzs7SKXSQqkzMTGxUIIl3ZgyZQp27dqFo0ePonbt2prlSqUSAPhe6Ni5c+eQmJiIFi1awNjYGMbGxggJCcFXX30FY2Njzbbldi8fjo6O8PLy0lrWqFEjxMTEAOB+X14++OADzJw5E0OHDkXjxo0xatQovP/++wgODgbA7V5RSrKdlUolcnNzkZKSUmybimCwQcjU1BQtWrTAwYMHtZYfPHgQ7dq101NV1ZMQApMnT8b27dtx5MgRuLu7a93v7u4OpVKp9V7k5uYiJCSE78UL6NatGy5duoQLFy5obi1btsSIESNw4cIF1K1bl9u9HLVv377QNBE3btyAm5sbAO735SUzMxNGRto/bVKpVDN8ntu9YpRkO7do0QImJiZabeLj43H58uWKfS8qrFt2JVQwfP77778XERERIjAwUFhYWIioqCh9l1atTJgwQSgUCnHs2DERHx+vuWVmZmrafPrpp0KhUIjt27eLS5cuiWHDhnE4azl4ctSYENzu5enMmTPC2NhYfPLJJ+Kff/4RP//8szA3NxcbNmzQtOH2170xY8YIZ2dnzfD57du3Czs7O/Hhhx9q2nC760ZGRoY4f/68OH/+vAAgli9fLs6fP6+ZgqYk2/mdd94RtWvXFocOHRLh4eGia9euHD5f0b7++mvh5uYmTE1NhY+Pj2ZIN+kOgCJv69at07RRq9UiKChIKJVKIZPJRKdOncSlS5f0V3Q19XQQ4nYvX7t37xbe3t5CJpMJT09P8e2332rdz+2ve+np6eK9994Trq6uQi6Xi7p164o5c+aInJwcTRtud904evRokd/tY8aMEUKUbDtnZWWJyZMnCxsbG2FmZib69esnYmJiKvR1SIQQouKOPxERERFVHgbbR4iIiIiIQYiIiIgMFoMQERERGSwGISIiIjJYDEJERERksBiEiIiIyGAxCBEREZHBYhAiIipCnTp18MUXX2j+lkgk2Llzp97qIaLywSBERJXO2LFjIZFIIJFIYGxsDFdXV0yYMKHQxRmJiF4UgxARVUq9evVCfHw8oqKi8N1332H37t2YOHGivssiomqGQYiIKiWZTAalUonatWvD398fQ4YMwYEDBzT3r1u3Do0aNYJcLoenpydWrVqltX5cXByGDh0KGxsbWFhYoGXLljh9+jQA4NatW+jfvz8cHBxQo0YNtGrVCocOHarQ10dElYOxvgsgInqe27dvY//+/TAxMQEArFmzBkFBQVi5ciWaN2+O8+fP46233oKFhQXGjBmDhw8fws/PD87Ozti1axeUSiXCw8OhVqsBAA8fPkSfPn3w8ccfQy6XY/369QgICMD169fh6uqqz5dKRBWMQYiIKqU9e/agRo0aUKlUyM7OBgAsX74cALBo0SJ8/vnnGDhwIADA3d0dERER+OabbzBmzBhs3LgR9+/fR1hYGGxsbAAA9erV0zx206ZN0bRpU83fH3/8MXbs2IFdu3Zh8uTJFfUSiagSYBAiokqpS5cuWL16NTIzM/Hdd9/hxo0bmDJlCu7fv4/Y2Fi88cYbeOuttzTt8/PzoVAoAAAXLlxA8+bNNSHoaY8ePcKCBQuwZ88e3L17F/n5+cjKykJMTEyFvDYiqjwYhIioUrKwsNAcxfnqq6/QpUsXLFiwQHPEZs2aNfD19dVaRyqVAgDMzMye+dgffPAB/vjjDyxbtgz16tWDmZkZBg0ahNzc3HJ4JURUmTEIEVGVEBQUhN69e2PChAlwdnbG7du3MWLEiCLbNmnSBN999x2Sk5OLPCp0/PhxjB07Fq+88gqAx32GoqKiyrN8IqqkOGqMiKqEzp0746WXXsLixYsxf/58BAcH48svv8SNGzdw6dIlrFu3TtOHaNiwYVAqlRgwYAD++usv3L59G9u2bUNoaCiAx/2Ftm/fjgsXLuDvv//G8OHDNR2piciwMAgRUZUxdepUrFmzBj179sR3332HH374AY0bN4afnx9++OEHuLu7AwBMTU1x4MAB2Nvbo0+fPmjcuDE+/fRTzamz//3vf6hZsybatWuHgIAA9OzZEz4+Pvp8aUSkJxIhhNB3EURERET6wCNCREREZLAYhIiIiMhgMQgRERGRwWIQIiIiIoPFIEREREQGi0GIiIiIDBaDEBERERksBiEiIiIyWAxCREREZLAYhIiIiMhgMQgRERGRwWIQIiIiIoP1/6BtEaeYem6TAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABRSklEQVR4nO3deVxU5f4H8M+wDKuMIruyuaAoagouQGqaoua1UivvtZtL2i/qmiltLjfN6oYtmllqpaa3mxY3l651vSpqmoq5IBSKK4uDMoiDOiwiy8z5/YFMDgzIDMOcmeHzfr14Bc+cc/ieIzEfnvM8z5EIgiCAiIiIyEbYiV0AERERkSkx3BAREZFNYbghIiIim8JwQ0RERDaF4YaIiIhsCsMNERER2RSGGyIiIrIpDmIXYG4ajQb5+flo06YNJBKJ2OUQERFREwiCgJKSEgQEBMDOrvG+mVYXbvLz8xEYGCh2GURERGSEvLw8dOzYsdFtWl24adOmDYCai+Ph4SFyNURERNQUxcXFCAwM1L6PN6bVhZvaW1EeHh4MN0RERFamKUNKOKCYiIiIbArDDREREdkUhhsiIiKyKa1uzA0RmZ5arUZVVZXYZRCRlZNKpfed5t0UDDdEZDRBEFBQUIBbt26JXQoR2QA7OzuEhoZCKpU26zgMN0RktNpg4+PjA1dXVy6MSURGq11kV6FQICgoqFm/TxhuiMgoarVaG2zat28vdjlEZAO8vb2Rn5+P6upqODo6Gn0cDigmIqPUjrFxdXUVuRIishW1t6PUanWzjsNwQ0TNwltRRGQqpvp9wnBDRERENkXUcPPLL79g3LhxCAgIgEQiwQ8//HDffQ4ePIjIyEg4OzujU6dO+Pzzz1u+UCIiIrIaooabsrIy9OnTB5999lmTts/JycEjjzyCwYMHIy0tDQsWLMDs2bOxdevWFq60aRSqcqRkKaFQlYt6DEuqxZLOh6il3e+PtAMHDkAikXDqvJU6d+4cBg0aBGdnZzzwwANil0ONEHW21JgxYzBmzJgmb//5558jKCgIK1asAACEh4fj5MmT+OijjzBx4sQWqrJpvj0ux8LtGdAIgJ0EeH1UN/ypT4BBx/jpt3x8sPt8s45hquNYyjH0HSdxQi9M6h9k8HGIxBYTEwOFQgGZTCZ2KSb18ssv4/Dhwzh9+jTCw8ORnp5eb5uMjAzMmjULx48fh6enJ55//nm8+eabOmMsDh48iISEBJw5cwYBAQF4/fXXER8fb8YzadzixYvh5uaG8+fPw93dvUn7bNy4EdOnT6/XXl5eDmdnZ+3Xq1evxocffgiFQoGePXtixYoVGDx4sMlqr61lzpw5RoVriUSC7du34/HHHzdoP4VCgVdeeQWpqam4ePEiZs+erX0Pb0lWNRX86NGjiIuL02kbNWoU1q9fj6qqKr3TxioqKlBRUaH9uri42OR1KVTl2mADABoBWLrrPJbuOm/0MU1xDEuqxZTns2DbaQwJ84a/zKVZxyLLolCVI0dZhlAvN4v8t23od4whpFIp/Pz8TFRRw0xRqyEEQcCzzz6LY8eO4ffff6/3enFxMUaOHIlhw4bhxIkTuHDhAqZNmwY3Nze88sorAP7omX/uuefwzTff4MiRI3jxxRfh7e3d4n+8NvV6ZWVlYezYsQgODjbo+B4eHjh/Xvd3373BJikpCXPmzMHq1asRGxuLL774AmPGjEFmZiaCgqz7D7mKigp4e3tj4cKF+Pjjj832fa1qQHFBQQF8fX112nx9fVFdXQ2lUql3n8TERMhkMu1HYGCgyevKUZZpg829HO0kcHKwa9KHo53+EeKGHMNUx7GUYzR0HLUgIFd5u4F/DRKTIAi4XVlt8Me/juYidul+TF57DLFL9+NfR3MNPoYg6PmfsAG7du3Cgw8+iLZt26J9+/b405/+hKysLO3rubm5kEgk+Pe//42HHnoIzs7O+OabbwAAX331FXr27AknJyf4+/tj1qxZOsdWKpUYP348XF1d0bVrV+zYsUP7mr7bUikpKRgyZAhcXFwQGBiI2bNno6ysrMnn0litGzZsQHh4OJydndG9e3esXr263n7fffcdYmJi4OzsjJ49e+LAgQNN/t61Vq5cib/97W/o1KmT3tc3bdqEO3fuYOPGjYiIiMCECROwYMECLF++XPvvdm/PfHh4OGbOnIlnn30WH330kUG1hISE4L333sOzzz6LNm3aICgoCF9++WW98zb0ekkkEqSmpuLtt9+GRCLBW2+91eSaJBIJ/Pz8dD7utXz5csyYMQMzZ85EeHg4VqxYgcDAQKxZs6bJ36P2vLZt24Zhw4bB1dUVffr0wdGjRwHU/OxNnz4dKpUKEolE5xwUCgXGjh0LFxcXhIaGYvPmzQgJCdH2sISEhAAAxo8fD4lEov26KUJCQvDJJ59gypQpZu2xtKqeG6D+NLHa/zEamj42f/58JCQkaL8uLi42ecAJ9XKDnQQ6AcdeIsEvbwxr8l+gClU5Ypfub9YxTHUcSzlG7XFiEvfj3rcte4kEIV5cW8USlVep0WPR7mYdQyMAb/7nDN78zxmD9st8exRcpU37lVZWVoaEhAT06tULZWVlWLRoEcaPH4/09HSd59q88cYbWLZsGTZs2AAnJyesWbMGCQkJWLp0KcaMGQOVSoUjR47oHHvJkiX44IMP8OGHH+LTTz/F008/jcuXL8PT07NeHRkZGRg1ahTeeecdrF+/HtevX8esWbMwa9YsbNiwwaDzr1vr2rVrsXjxYnz22Wfo27cv0tLS8Nxzz8HNzQ1Tp07V7vfaa69hxYoV6NGjB5YvX45HH30UOTk52oUZQ0JCMG3aNIPezOs6evQohg4dCicnJ23bqFGjMH/+fOTm5iI0NNSonvmGLFu2DO+88w4WLFiALVu24IUXXsCQIUPQvXt37TaGXi+FQoERI0Zg9OjRePXVV+Hu7o4DBw5g2LBhyMnJafQNv7S0FMHBwVCr1XjggQfwzjvvoG/fvgCAyspKpKamYt68eTr7xMXFISUlpcnnXGvhwoX46KOP0LVrVyxcuBB/+ctfcOnSJcTExGDFihVYtGiRthep9tbalClToFQqceDAATg6OiIhIQGFhYXaY544cQI+Pj7YsGEDRo8eDXt7ewDQ/tv9/PPPeOihhwyutSVZVc+Nn58fCgoKdNoKCwvh4ODQ4AqpTk5O8PDw0PkwNX+ZCxIn9IL93YBlL5HgvQkRBr2Jm+IYllSLKc9nYmRH7dfGHofoXhMnTsSECRPQtWtXPPDAA1i/fj0yMjKQmZmps92cOXMwYcIEhIaGIiAgAO+++y5eeeUVvPzyywgLC0P//v0xZ84cnX2mTZuGv/zlL+jSpQvee+89lJWV4fjx43rr+PDDDzF58mTMmTMHXbt2RUxMDFauXImvv/4ad+7cMeic6tb6zjvvYNmyZdq2CRMmYO7cufjiiy909ps1axYmTpyI8PBwrFmzBjKZDOvXr9e+3rlzZ3h5eRlUS10N9brXvtbYNo31zDfkkUcewYsvvoguXbrgjTfegJeXV70eKUOvl5+fHxwcHODu7g4/Pz+4u7vD1dUV3bp1azR4de/eHRs3bsSOHTvw7bffwtnZGbGxsbh48SKAmp4+tVqt99zrvt81xauvvoqxY8ciLCwMS5YsweXLl3Hp0iVIpVLIZDKdXiR3d3ecO3cOe/fuxdq1azFw4ED069cP69atQ3n5H5M3vL29AQBt27aFn5+f9mtHR0d069bNIhfytKqem+joaPz44486bXv27EFUVJRZ7y/rM6l/EIaEeSNXeRshXq5Gvfma4hiWVIupzmdAqCe2pF5B38C2WP3Xfgw2FszF0R6Zb48yaJ8C1R2MWH5Qp5fPTgLsTRgKP5lzwzvq+d5NlZWVhTfffBO//vorlEolNBoNAEAulyMiIkK7XVRUlPbzwsJC5Ofn4+GHH2702L1799Z+7ubmhjZt2uj8FXyv1NRUXLp0CZs2bdK2CYIAjUaDnJwchIeHN/mc7q31+vXryMvLw4wZM/Dcc89p26urq+vdGoiOjtZ+7uDggKioKJw9e1bbtm/fvibX0Jim9Lob2jPfkHv/DWrfzOv+Gxh7ve41YMAAnDt3rtFaBg0ahEGDBmm/jo2NRb9+/fDpp59i5cqVOnXeSxAEoxa0u/fc/f39AdT87N7ba3Wv8+fPw8HBAf369dO2denSBe3atbvv9+rQocN9z18sooab0tJSXLp0Sft1Tk4O0tPT4enpiaCgIMyfPx9Xr17F119/DQCIj4/HZ599hoSEBDz33HM4evQo1q9fj2+//VasU9DhL3Np9huvKY5hSbWY6nwAoJ2blMHGwkkkkibfGqrVydsdiRN6YcG201ALgrZ3rpN302ajGGPcuHEIDAzE2rVrERAQAI1Gg4iICFRWVups5+bmpv3cxaVpP3t1/9CSSCTa8FSXRqPB888/j9mzZ9d7zdCBpPfWWvv9av8av1ftLYXGmHrV6YZ63YE/enCM6ZlvSFP+DUx5vQxhZ2eH/v37a3tuvLy8YG9vr/fc6/bmNMW9517779jQzx+ABseqGTKGzRKJGm5OnjyJYcOGab+uHRszdepUbNy4EQqFAnK5XPt6aGgodu7ciblz52LVqlUICAjAypUrRZ8GTkTNY6pevqYoKirC2bNn8cUXX2in2h4+fPi++7Vp0wYhISHYt2+fzu+t5ujXrx/OnDmDLl26mOR4tXx9fdGhQwdkZ2fj6aefbnTbX3/9FUOGDAFQ01ORmppab5B0c0VHR2PBggWorKzUPjtoz549CAgI0I5VEbNn3pDr1VyCICA9PR29evUCUDODLjIyEsnJyRg/frx2u+TkZDz22GMm/d5SqbTeM5u6d++O6upqpKWlITIyEgBw6dKletPFHR0dm/28J3MSNdw89NBDjabDjRs31msbOnQoTp061YJVEZEYTNnL15h27dqhffv2+PLLL+Hv7w+5XF5vMGdD3nrrLcTHx8PHxwdjxoxBSUkJjhw5gpdeesmoWt544w0MGjQIf/vb37SDV8+ePYvk5GR8+umnRh3z3lpnz54NDw8PjBkzBhUVFTh58iRu3rypM8li1apV6Nq1K8LDw/Hxxx/j5s2bePbZZ7WvP/zwwxg/fnyjgefSpUsoLS1FQUEBysvLtevc9OjRA1KpFJMnT8aSJUswbdo0LFiwABcvXsR7772HRYsWaXsXxO6Zb+r1utfx48cxZcoU7Nu3Dx06dNC7zZIlSzBo0CB07doVxcXFWLlyJdLT07Fq1SrtNgkJCXjmmWcQFRWF6OhofPnll5DL5SZf4yckJASlpaXYt28f+vTpA1dXV3Tv3h0jRozA//3f/2HNmjVwdHTEK6+8AhcXF50evNpgHxsbCycnJ7Rr1w5Xr17Fww8/jK+//hoDBgxo8PvW/jyUlpbi+vXrSE9Ph1QqRY8ePUx6fjqEVkalUgkABJVKJXYp1ERJJ+RC8Bs/CdM3HBe7FLpHeXm5kJmZKZSXl4tdisGSk5OF8PBwwcnJSejdu7dw4MABAYCwfft2QRAEIScnRwAgpKWl1dv3888/F7p16yY4OjoK/v7+wksvvaR97d5j1JLJZMKGDRsEQRCEn3/+WQAg3Lx5U/v68ePHhZEjRwru7u6Cm5ub0Lt3b+Ef//hHg7XX1vbzzz/ft9ZNmzYJDzzwgCCVSoV27doJQ4YMEbZt26az3+bNm4WBAwcKUqlUCA8PF/bt26dzjODgYGHx4sUN1iMIgjB06FABQL2PnJwc7Ta///67MHjwYMHJyUnw8/MT3nrrLUGj0egc58CBA0Lfvn0FqVQqhISECGvWrGn0++qrLzg4WPj44491tunTp492G2OvV93jCMIf/573nmddc+bMEYKCggSpVCp4e3sLcXFxQkpKSr3tVq1aJQQHBwtSqVTo16+fcPDgwUbPuyk/Bzdv3tTZRhAEIT4+Xmjfvr0AQHsu+fn5wpgxYwQnJychODhY2Lx5s+Dj4yN8/vnn2v127NghdOnSRXBwcBCCg4P11tAQfT8btceoq7HfK4a8f0vufuNWo7i4GDKZDCqVqkVmTpHp/ftkHl7f8juGd/fBV9P6i10O3XXnzh3k5OQgNDRUZ0EyalkHDhzA+PHjkZ2d3aRBnw2pncablpZmtY8SKC8vh6enJ3bu3GmyW4XWwlQ/B/pcuXIFgYGB2Lt3730H0JtaY79XDHn/tqrZUkRErd2uXbuwYMECk7+hWaODBw9i+PDhrS7YAKb9Odi/fz9KS0vRq1cvKBQKvP766wgJCdGOxbJGDDdERFZk6dKlYpdgMUaPHo3Ro0eLXYYoTPlzUFVVhQULFiA7Oxtt2rRBTEwMNm3aJPoSK83BcENE1AqFhIRY/XRfMo1Ro0Zh1CjD1qeydFa1QjERWR6+QRKRqZjq9wnDDREZpbbL+vZtPsSUiEyjdiHN5i6eyNtSRGQUe3t7tG3bVrvSrKurq8lXtiWi1kOj0eD69etwdXWFg0Pz4gnDDREZzc/PDwAafHYSEZEh7OzsEBQU1Ow/lBhuiMhoEokE/v7+8PHxQVVVldjlEJGVk0qlsLNr/ogZhhsiajZ7e3uTP2CQiMhYHFBMRERENoXhhoiIiGwKww0RERHZFIYbIiIisikMN0RERGRTGG6IiIjIpjDcEBERkU1huCEiIiKbwnBDRERENoXhhoiIiGwKww0RERHZFIYbIiIisikMN0RERGRTGG6IiIjIpjDcEBERkU1huCEiIiKbwnBDRERENoXhhoiIiGwKww0RERHZFIYbIiIisikMN0RERGRTGG6IiIjIpjDcEBERkU1huCEiIiKbwnBDRERENoXhhoiIiGwKww0RERHZFIYbIiIisikMN0RERGRTGG6IiIjIpjDcEBERkU1huCEiIiKbwnBDRERENoXhhoiIiGwKww0RERHZFIYbIiIisikMN2Q1bpZVQqEqF7sMIiKycAw3ZPGO59wAAKTl3ULs0v1IOiEXuSIiIrJkDDdk0RSqcmxNvaL9WiMAC7adZg8OERE1iOGGLFqOsgxCnTa1ICBXeVuUeoiIyPIx3JBFC/Vyg6ROm71EghAvV1HqISIiy8dwQxbNX+aCiZEdtV/bSyR4b0IE/GUuIlZFRESWjOGGLN6AUE8AQN/Atjg8bxgm9Q8SuSIiIrJkDDdkNdq5SdljQ0RE98VwQ0RERDaF4YaIiIhsiujhZvXq1QgNDYWzszMiIyNx6NChRrfftGkT+vTpA1dXV/j7+2P69OkoKioyU7VERERk6UQNN0lJSZgzZw4WLlyItLQ0DB48GGPGjIFcrn8F2sOHD2PKlCmYMWMGzpw5g++//x4nTpzAzJkzzVw5ERERWSpRw83y5csxY8YMzJw5E+Hh4VixYgUCAwOxZs0avdv/+uuvCAkJwezZsxEaGooHH3wQzz//PE6ePGnmyomIiMhSiRZuKisrkZqairi4OJ32uLg4pKSk6N0nJiYGV65cwc6dOyEIAq5du4YtW7Zg7NixDX6fiooKFBcX63wQERGR7RIt3CiVSqjVavj6+uq0+/r6oqCgQO8+MTEx2LRpEyZNmgSpVAo/Pz+0bdsWn376aYPfJzExETKZTPsRGBho0vMgIiIiyyL6gGKJRHdxfUEQ6rXVyszMxOzZs7Fo0SKkpqZi165dyMnJQXx8fIPHnz9/PlQqlfYjLy/PpPUTERGRZXEQ6xt7eXnB3t6+Xi9NYWFhvd6cWomJiYiNjcVrr70GAOjduzfc3NwwePBgvPvuu/D396+3j5OTE5ycnEx/AkRERGSRROu5kUqliIyMRHJysk57cnIyYmJi9O5z+/Zt2Nnplmxvbw+gpseHiIiISNTbUgkJCVi3bh2++uornD17FnPnzoVcLtfeZpo/fz6mTJmi3X7cuHHYtm0b1qxZg+zsbBw5cgSzZ8/GgAEDEBAQINZpEBERkQUR7bYUAEyaNAlFRUV4++23oVAoEBERgZ07dyI4OBgAoFAodNa8mTZtGkpKSvDZZ5/hlVdeQdu2bTF8+HC8//77Yp0CERERWRiJ0Mru5xQXF0Mmk0GlUsHDw0PscqgJ/n0yD69v+R3Du/vgq2n9xS6HiIhEYMj7t+izpYiIiIhMieGGiIiIbArDDREREdkUhhsiIiKyKQw3REREZFMYboiIiMimMNwQERGRTWG4oVZDoSpHSpYSClW52KUQEVELEnWFYiJzSTohx/xtGdAIgJ0ESJzQC5P6B4ldFhERtQD23JDNU6jKMe9usAEAjQAs2HaaPThERDaK4YZsWrVag2W7L6DuQ0bUgoBc5W1xiiIiohbF21Jks3KVZZj773SkyW/Ve81eIkGIl6v5iyIiohbHnhuyOYIg4Lvjcjyy8hDS5LfQxtkBE/p10L5uJwHemxABf5mLiFUSEVFLYc8N2ZSi0grM25aB5MxrAICBoZ5YPukBeLpKse3UVQBA8tyh6OzjLmaZRETUghhuyGb8fK4Qr235HcrSCjjaS/BqXDfMHNwJ9nYSlFeqtdv5yZxFrJKIiFoaww1ZvfJKNf6xMxPf/CoHAIT5umPFpL7oEeAhcmUNU6jKkaMsQ6iXG2+PERGZGMMNWbXfr9zCnO/Ska0sAwA8GxuK10d3g7OjvciVNey743LM354BgWvuEBG1CIYbskrVag0+P5iFFXsvolojwNfDCcuefAAPdvUSu7RG/ff3fMzblqH9unbNnSFh3uzBISIyEYYbsjryotuY++90pF6+CQAY28sf/xgfgbauUpEra1hmfjHe33UOBy9cr/da7Zo7DDdERKbBcENW40ZZBb78JQuf7L2Isko12jg5YMljPTG+bwdIJBKxy9Mr78ZtLNtzHv/5LR+CANhLAHWdBQXFXHOHY3+IyBYx3JDFO55zAwCQnqdCep4KADAg1BPLn+qDju0scyE+ZWkFPtt/CZuOXUbV3TQzrk8AXhkZhs8PZuG7E3kAaoKNWGvu8HlbRGSrGG7IoilU5diaekWnTQJYbLAprajG2l+yse5QNsruTj8f3NULb4zujogOMgBAdOf2+O5EHiICPLB2apQowUahKtcGG4Bjf4jItjDckEXLUZahzl0cCADybpRbVLipqFZj8zE5Ptt/CUVllQCA3h1lmDe6O2K66B/k3NZVavYgodEI+OXidXy894I22NTi2B8ishUMN2TRQr3cYCeBzhuxJT0XSqMR8J/frmLZngu4crPmKeOdvNzw6qhuGBPhZzFjgcoqqrH11BVsTMlF9vUyvdtY0nUlImoOhhuyaP4yFyRO6IUF205DLQiijlEB/hiAG9LeFecLSvH+rnM4V1ACAPBp44Q5I8LwZFRHONpbxmPb5EW38c+jufj3iTyUVFQDANo4OeDJqEC0d5fiw93nAQASPm+LiGwIww1ZvEn9gzAkzBu5ytsI8XIV7Q343gG492rj7IAXHuqM6TGhcJGKv3igIAhIySrChiO52HfuGoS79XbycsPUmBBMjOwId6ea//VPXr6Bn89dR8LIMA4mJiKbwXBDVsFf5iJqr0LdAbi1Jg8MwuujulnEGjvllWpsT7uKjSk5uHCtVNs+NMwb02JDMLSrN+zsdG+TudxdyVnm4mjWWomIWhLDDdF9CIKAb4/L6wUbABjXO0C0YFN7i8zZwQ67M6/hu+N5UJVXAQBcpfZ4IrIjpkSHoAufgE5ErQzDDVEjCovv4M3/nMbuM9fqvSbmANykE3LM25ahveVUK9DTBVOjQ/BkVCB7Y4io1WK4IdJDEARsSb2Cd37KRPGdajjYSTC0mzd+PlcIjSDu4nu7TyvwxtaMeu3vT+yNJyI7wt7OMmZoERGJheGGqI6rt2rG1/xy9zlQvTrI8METvRHu7wGFqly0gc2pl29ixd4LOHRRqff1IE9XBhsiIjDcEGlpNAI2HZdj6c6zKKtUQ+pgh7kjwvDc4FA43J3aLcbA5jT5TXy896I2bFna86mIiCwNww0RgFxlGd7Y+juO3X2OVVRwO7z/RG909hZvMG563i2s2HsBB87fDTV2EjzRryNmDe+ClCylxaz9Q0RkaRhuqFVTawRsOJKDj/acx50qDVwc7fHG6G6YEh1Sb9q0ufx+5RZW7L2I/ecKAdSEmgl9O+Cl4V0R1L6md2aSp2Ws/UNEZIkYbqjVunitBK9t+R3pebcAALFd2mPphN4I9BTn9k7GFRVW7L2AfXdDjZ0EGN+3I14a3gUhXm71thd77R8iIkvFcEOtztWb5dh9pgCf7r+ESrUGbZwcsHBsOCb1DzTrs6Bu3a6EQlWOotJKrNh7EXvP1kw3t5MAj9/tqQnVE2pMqbyq5snltevjEBHZAoYbahW2pF7Rfh634hft5w9398G74807XuVoVhEA4HR+MaIT92vb7STAYw90wEvDu6CTGcb6JJ2Q4+dzNeN5lu+5AJ82TnwEAxHZBIYbsnkKVTkW7zhdr33Joz0wJTrErL01ClU5kk7k1WuP6+GL10d3N9tqwrWPk6glAFiw7TSGhHnzVhcRWT3LeHQxUQvKUZbpfXRCmK+HWYNNbS16SsH02FCzPiZB3zVRCwJylbfNVgMRUUthuCGbF+rlhroTn8RaF8ZSarGUOoiIWgLDDdk8f5kLEif0gv3dXhox14WxlFpq66glkYBr5RCRzZAIQt1H79m24uJiyGQyqFQqeHh4iF0OmZGYj06w1FqmbzyOn89dxytxYXhpeFfR6iAiuh9D3r85oJhaDUtaF8ZSanFxtAeAZj9BXKEqR46yDKFebhZxXkTUujHcEFGzJJ2QY/62DGiEmunsiRN6cUo5EYmKY26IyGinLt/AvK0Z2plXGqFmSrlCVS5uYUTUqrHnhogMIggCTuTexIYjOdh1uqDe1PbaKeW8PUVEYmG4IaImuVOlxo+/5WNjSi7O5Bc3uJ0tTCnnGCIi68ZwQ0SNKiy+g29+vYxNx+QoKqsEADg52GFCvw6YFhOKfx3NxTfH5ADEnWZvKt/8ehlv/uc0BI4hIrJaDDdEpNdvebew4UgO/puhQJW65uaTv8wZU6JD8Of+gWjnJgUAxHbxwjfH5Oju2wYbnu1vlcGmslqDI5eUSDqZh12nC7TttWOI+FgKIuvCcENEWlVqDf53ugAbj+TglPyWtj0quB2mx4ZiVE9fONjrn4fg4eJoVQGgWq3B0ewi/PSbArvOFDT4ZHSOISKyPgw3RK1YeZUaAHC5qAyrfr6Efx29jILiOwAAR3sJxvUOwPTYUPTqKBOzTJNRawQcz7mBn37Px67TBdrbbADg5e6EoWFe2Hbqqs4gaVsYQ0TU2jDcELVSSSfk+PncdQDA+sO52nYvdyf8dVAQJg8Mgk8bZ5GqMx2NRsAp+U389LsC/81Q4HpJhfa1dq6OGB3hj3G9/TGwU3vY20nQxtkBG1MuA7CNMURErRHDDVErpFCVY/62jHrti/4UjqcHBcPJwV6EqpqvdpZTSHtXFJZU4qff8vHfDAUUqjvabTycHTCqpx/+1CcAMZ3bw7HObbbBXb2xMeUyOnu74ZuZAxlsiKwQww1RK5SjLNMuvHevcH+Z1Qabe1dKrsvdyQEje/jiT739MbirN6QO91+/1N3ZusYQEdEfGG6IWqFQLzfYSaATBJo7tqS4vAoKVbnZA8GNskr862guPt57sd5rD3f3wZNRgXiomzecHa0ztBGR4fj4BaJWyF/mgsQJvWAvkQBo3tiSI5eUAIBz10oQu3Q/kk7ITVqrPlVqDfZmXkP8v1Ix8L29eoMNAMwc3AmjI/yMCjald6r4GAkiK8WeG6JWalL/IAwJ80au8jZCvFyNCjYKVTk2HfsjzLT0ujBnFcXYknoF/0m/CmXpHzOdwnzdcfFaqUlmOR26WDPIOut6GWKX7ucifkRWSPSem9WrVyM0NBTOzs6IjIzEoUOHGt2+oqICCxcuRHBwMJycnNC5c2d89dVXZqqWyLb4y1wQ3bm90UEkR1nW4LOlTKWotAJfHc7B2JWHMOaTQ1h/OAfK0kp4uUsx88FQ/O/lwdgzdyiWTmx+T5RCVY5/Hr2s/ZoPAiWyTqL23CQlJWHOnDlYvXo1YmNj8cUXX2DMmDHIzMxEUJD+v5SeeuopXLt2DevXr0eXLl1QWFiI6upqM1dOREDN2B0JYJIek3uf59TezQk/ny/E1tQr2H+uENV3Bwc52kswItwXE/t1xNBu3joznUzRE5WjLINQJ61xET8i6yNquFm+fDlmzJiBmTNnAgBWrFiB3bt3Y82aNUhMTKy3/a5du3Dw4EFkZ2fD09MTABASEmLOkonoHv4yFzw9MKjZz5a6d6aTBICr1B5llWrt6706yPBEZEc82idA+9iHhuppTggJ9XKDRAKdgMNF/Iisj2i3pSorK5Gamoq4uDid9ri4OKSkpOjdZ8eOHYiKisIHH3yADh06ICwsDK+++irKyxvuMq6oqEBxcbHOBxGZTmwXLwBAd982ODxvmMHjU7Kvl2Le1j+mcAsAyirVaOcqxf8N6YTdc4bgx5cexNSYkEaDjSn4y1wwNTpY+zUX8SOyTqL13CiVSqjVavj6+uq0+/r6oqCgQO8+2dnZOHz4MJydnbF9+3YolUq8+OKLuHHjRoPjbhITE7FkyRKT109Eugx9ttSlwlJ88+tlJJ2Q1xu3AwCf/uUBPNjV23QFNhEX8SOyfqLPlpLcHQBYSxCEem21NBoNJBIJNm3aBJms5lk3y5cvxxNPPIFVq1bBxaX+L6H58+cjISFB+3VxcTECAwNNeAZE1FRVag2SM6/hX0cv42h2UYPb2Usk6OzjbsbK6uMifkTWS7Rw4+XlBXt7+3q9NIWFhfV6c2r5+/ujQ4cO2mADAOHh4RAEAVeuXEHXrl3r7ePk5AQnJyfTFk9EBilQ3cG3x+X49rgchXef7WQnAR4O98VfBwUj/1Y5/r79NNSCwFtBRNRsooUbqVSKyMhIJCcnY/z48dr25ORkPPbYY3r3iY2Nxffff4/S0lK4u9f8VXfhwgXY2dmhY8eOZqmbiJpGEASkZBXhX0cvI/nsNajvDqrxcpfiz/2D8JeBQejQ9o8A81C35s10MrXaRfwsoRYiMoyot6USEhLwzDPPICoqCtHR0fjyyy8hl8sRHx8PoOaW0tWrV/H1118DACZPnox33nkH06dPx5IlS6BUKvHaa6/h2Wef1XtLiojMp/bxC65SB2xNvYJvjl1G9vUy7esDQjzx1+hgjO7pp/fZTs2d6WQqXMSPyPqJGm4mTZqEoqIivP3221AoFIiIiMDOnTsRHFwzW0GhUEAu/2P1U3d3dyQnJ+Oll15CVFQU2rdvj6eeegrvvvuuWKdA1Ord+/iF6MT9cLCXoFpd00vjJrXHhH4d8ddBwejm10bMMpukoUX8WmrFZSJqGRJBqLtk1f2VlZVh6dKl2LdvHwoLC6HRaHRez87ONlmBplZcXAyZTAaVSgUPDw+xyyGyagpVOWIS99eb7dTJyw3THwzF+L4d4O4k+ryFJkvJUmLy2mP12r99bhCiO7cXoSIiqmXI+7dRv3VmzpyJgwcP4plnnoG/v3+Ds5uIyLbpe/wCAPxjfASiO3uZvZ7m4iJ+RLbBqHDzv//9D//9738RGxtr6nqIyIqEernBTgLtAnxAbRhwE6+oZqhdxG9jSs2tKc7cIrJORq1Q3K5dO+3jD4io9fKXuSBxQvMfWGlJBt9dOLCzt5tRKy4TkfiM6rl55513sGjRIvzzn/+Eqyu7a4laM1M8sNIScRE/IutlVLhZtmwZsrKy4Ovri5CQEDg6Ouq8furUKZMUR0TWwVKmcZsS17khsl5GhZvHH3/cxGUQEVkGrnNDZP2MmgpuzTgVnIgaolCVI2bp/nqzpQ7PG8YeHCKRtfhU8Fqpqak4e/YsJBIJevTogb59+zbncEREospRlqHun3tqQUCu8jbDDZEVMSrcFBYW4s9//jMOHDiAtm3bQhAEqFQqDBs2DN999x28vb1NXScRUYvjOjdEtsGoqeAvvfQSiouLcebMGdy4cQM3b97E6dOnUVxcjNmzZ5u6RiIis6hd56aWLUxtJ2qNjOq52bVrF/bu3Yvw8HBtW48ePbBq1SrExcWZrDgiInMb3NUbG1Muo7O3G76ZOZDBhsgKGdVzo9Fo6k3/BgBHR8d6z5kiIiIiMiejws3w4cPx8ssvIz8/X9t29epVzJ07Fw8//LDJiiMiMre6U8GTTsiNOo5CVY6ULCUUqnJTlkdETWDUbanPPvsMjz32GEJCQhAYGAiJRAK5XI5evXrhm2++MXWNRERmoVCV459HL2u/1gjAgm2nMSTM26DbU0kn5Ji/LQMaAbCTgGvlEJmZUeEmMDAQp06dQnJyMs6dOwdBENCjRw+MGDHC1PUREZlNc6eCqzUCdmbk442tGdo2YwMSERmvWevcjBw5EiNHjjRVLUREojJmKrhGI+BE7g38N0OBnRkFUJZW1NuGa+UQmVeTw83KlSvxf//3f3B2dsbKlSsb3ZbTwYnIGtVOBd+YUnNrqqGp4BqNgLS8m/jxNwV2ZihQWPJHoGnj7ICSO9U623OtHCLzavLjF0JDQ3Hy5Em0b98eoaGhDR9QIkF2drbJCjQ1Pn6BiBqz7+w1zPjnyXpTwQVBQHreLfz0e02gUajuaPdp4+yAuB5++FNvf8R28cL2tCvaW1Mcc0NkGi3y+IWcnBy9nxMR2SpBEPD7lZpA89/fFbh664+ZT+5ODhjZwxdje/ljcJgXnBzsta9N6h+ERf85jYpqAZ//tR/ievqLUT5Rq9WsMTe11Go1MjIyEBwcjHbt2pnikEREorh3KnjM0p91XnOV2mNEuC/G9vbH0DBvODva6zsEkk7IUVFd0yke/80p9twQmZlR4WbOnDno1asXZsyYAbVajSFDhuDo0aNwdXXFTz/9hIceesjEZRIRtby6U8FrDe/ugycjO2JYd58GA829x5i/jbOliMRk1CJ+W7ZsQZ8+fQAAP/74I3Jzc3Hu3DnMmTMHCxcuNGmBRETmom8qOAA8N7gTxvTyv2+wqT2GpoHp5ERkHkaFG6VSCT8/PwDAzp078eSTTyIsLAwzZsxARkbGffYmIrJMoV5usJPothk608kUxyCi5jEq3Pj6+iIzMxNqtRq7du3SLt53+/Zt2Nvf/y8bIiJL5C9zQeKEXrCX1KQTY54KXnuMWnYS8MniRGZm1Jib6dOn46mnnoK/vz8kEol2Ib9jx46he/fuJi2QiMicJvUPwpAwb+QqbyPEy9WoUMLZUkTiMircvPXWW4iIiEBeXh6efPJJODk5AQDs7e0xb948kxZIRGRu/jKXZvW0cLYUkbiavIifreAifkTUkhSqcsQu3a8zqNheIsHhecN4a4qoGVpkET8+foGI6P4amy3FcENkHk0ONx9//DGefvppODs74+OPP25wO4lEwnBDRK1W7Wypuj03nC1FZD58/AIRkQnVzpa699lSnC1FZF5GTQUnIqKGTeofBCeHmunkn/+1HwcTE5mZUeHmiSeewNKlS+u1f/jhh3jyySebXRQRkTWrO1sq6YRc5IqIWhejws3BgwcxduzYeu2jR4/GL7/80uyiiIisVUPPllKoyhvZi4hMyahwU1paCqlUWq/d0dERxcXFzS6KiMha8dlSROIzKtxEREQgKSmpXvt3332HHj16NLsoIiJrxWdLEYnPqBWK33zzTUycOBFZWVkYPnw4AGDfvn349ttv8f3335u0QCIia8LZUkTiMyrcPProo/jhhx/w3nvvYcuWLXBxcUHv3r2xd+9eDB061NQ1EhFZrda1BjyRZeDjF4iITIiPXyBqGYa8fxu9zs2tW7ewbt06LFiwADdu3AAAnDp1ClevXjX2kEREVo8DionEZ9Rtqd9//x0jRoyATCZDbm4uZs6cCU9PT2zfvh2XL1/G119/beo6iYisAh+/QCQ+o3puEhISMG3aNFy8eBHOzs7a9jFjxnCdGyJq1WoHFNfigGIi8zMq3Jw4cQLPP/98vfYOHTqgoKCg2UUREdmK1jWqkcgyGBVunJ2d9S7Wd/78eXh7eze7KCIia1V3hWIBXKGYyNyMCjePPfYY3n77bVRVVQEAJBIJ5HI55s2bh4kTJ5q0QCIia8IBxUTiMyrcfPTRR7h+/Tp8fHxQXl6OoUOHokuXLmjTpg3+8Y9/mLpGIiKrwRWKicRn1GwpDw8PHD58GPv378epU6eg0WjQr18/jBgxwtT1ERFZFX+ZC8b37YCtp/5YFuPxvgEcUExkRgaHm+rqajg7OyM9PR3Dhw/XPn6BiIhqxtxsT9Nd7+uHtHy8OqobAw6RmRh8W8rBwQHBwcFQq9UtUQ8RkVUz5ZgbhaocKVlKDkYmMpBRY27+/ve/Y/78+dqViYmIqIapxtwknZAjdul+TF57DLFL9yPphNyEVRLZNqPG3KxcuRKXLl1CQEAAgoOD4ebmpvP6qVOnTFIcEZG1McWYm/MFxZi3NQO1HUAaoWY6+ZAwb97aImoCo8LN448/DolEglb2zE0iovsydsxN3o3bSM68huTMaziWU4S6v11rb20x3BDdn0Hh5vbt23jttdfwww8/oKqqCg8//DA+/fRTeHl5tVR9RERWpbExN/cGE0EQcCa/GHsyr2HPmQKcKyhp9LicTk7UdAaFm8WLF2Pjxo14+umn4eLigs2bN+OFF17A999/31L1ERFZlcYenFml1uBY9g3sySzA3sxryFfd0W5jJwGiQjwR18MXI3v4YuW+i5xOTmQkiWDAvaXOnTvjH//4B/785z8DAI4fP47Y2FjcuXMH9vb2LVakKRUXF0Mmk0GlUsHDw0PscojIBiWdkOONrTWPYLCTAH/uH4TSimr8fL4QJXeqtdu5ONpjSJgXRvbww/DuPvB0kwKoubUVu3R/vYB0eN4wBhxqtQx5/zao5yYvLw+DBw/Wfj1gwAA4ODggPz8fgYGBxlVLRGTDNAKw+fgfM5283KV4uHtN78yDXb3g7Fj/D8Om3toiIv0MCjdqtRpSqVT3AA4OqK6ubmAPIqLWpe6DM2s9PTAI4/t2QN+gdrCvO1e8jsZubRHR/RkUbgRBwLRp0+Dk5KRtu3PnDuLj43Wmg2/bts10FRIRWRF9vS4A8KfeAYgK8WzSMfgIB6LmMSjcTJ06tV7bX//6V5MVQ0Rk7UzR68JHOBA1j0HhZsOGDSYvYPXq1fjwww+hUCjQs2dPrFixQmdcT0OOHDmCoUOHIiIiAunp6Savi4jIGP4yFyRO6IUF205DLQiwl0jw3oQIg0IJx9wQNY9Ri/iZSlJSEubMmYPVq1cjNjYWX3zxBcaMGYPMzEwEBQU1uJ9KpcKUKVPw8MMP49q1a2asmIjo/ib1D8KQMG/kKm8jxMvV4EAS6uUGCaCzkJ9EAo65IWoio54tZSrLly/HjBkzMHPmTISHh2PFihUIDAzEmjVrGt3v+eefx+TJkxEdHX3f71FRUYHi4mKdDyKiluYvc0F05/am62nhgvBETSZauKmsrERqairi4uJ02uPi4pCSktLgfhs2bEBWVhYWL17cpO+TmJgImUym/eCUdSKydDnKsnpZRgCMerI4UWskWrhRKpVQq9Xw9fXVaff19UVBQYHefS5evIh58+Zh06ZNcHBo2h21+fPnQ6VSaT/y8vKaXTsRUUuqvS11L96WImo6UcfcAIBEovu/sCAI9dqAmjV2Jk+ejCVLliAsLKzJx3dyctKZuk5EZJV4W4qoyUQLN15eXrC3t6/XS1NYWFivNwcASkpKcPLkSaSlpWHWrFkAAI1GA0EQ4ODggD179mD48OFmqZ2IqCU1dluKs6WI7k+021JSqRSRkZFITk7WaU9OTkZMTEy97T08PJCRkYH09HTtR3x8PLp164b09HQMHDjQXKUTEbWo2rVy7sUViomaTtTbUgkJCXjmmWcQFRWF6OhofPnll5DL5YiPjwdQM17m6tWr+Prrr2FnZ4eIiAid/X18fODs7FyvnYjImnGFYqLmETXcTJo0CUVFRXj77behUCgQERGBnTt3Ijg4GACgUCggl8vvcxQiItvCFYqJmkciCEKrGqZmyCPTiYjEkJKlxOS1x+q1f/vcIER3bi9CRUTiM+T9W9RF/IiIqD5OBSdqHoYbIiJr0Kr62Imah+GGiMjCcIViouZhuCEisjCcCk7UPAw3REQWxl/mgsQJvbTjbiQA3psQwZlSRE3EcENEZKGEOv8loqZhuCEisjAKVTnmb8vQaVuw7TQUqnKRKiKyLgw3REQWJkdZBk2d7hq1IHBAMVETMdwQEVkYrnND1DwMN0RE1oADb4iajOGGiMjCcJ0bouZhuCEisjBuUnu97a5S/somagr+n0JEZGHKKtV6229Xaow6nkJVjpQsZbNmW5niGETm4iB2AUREpKt2QPG9t6aMHVD83XE5FmzPgEYA7CRA4oRemNQ/yKBjJJ2QY/625h2DyJwYboiIrIEBA4oLS+4g5VIRkjML8N+MAm27RqhZL2dImPd9Vzu+WVaJ47k3sP9sIZJO5hl1DCKxMNwQEVmYxgYU6wsUxXeqcCz7Bo5cUiIlS4kL10obPHbtejl1j1NUWoHjOTdwLOcGfs0uwrmCEoOPQWQpGG6IiCzM/QYU36lS49TlmziSpcSRS0X4/cotnUX/JBKgh78Huvq444f0fL3HuV5SgWM5RTiWXRNmLhbWD0RdfNzRxcsNuzKvNVgLkSViuCEisjANDSjefDwPH+w+j5O5N1FRrTu4ONTLDTGd2yO2ixeiO7VHOzcpUrKUesNN/DenoFDdqdfezbcNBnbyxMDQ9hgQ6gnvNk5IyVLqDTfGDm4mMgeGGyIiC9NQz03SiT/Gvvi0cUJsFy9toAloW/8WUUPHUajuQCIBuvt5YGCoJwZ1qgkznm7SJh+DPTdkyRhuiIgsTEM9N1HB7TCuTwBiu7RHZ293SCR1H9LQtOO8GheGZwaFQObqaHQtxvTcKFTlyFGWIdTLjeN1qEUx3BARWZhQLzfYSaAzjsZOAnw6ua9BoUDfcewlEkyM7NikYAOYrueG08nJnNivSERkYfxlLkic0Av2d3tm7CUSJE7oZXBvh77jvDchwqDjmKLnJuPqLczbmqENWbXTybkgILUU9twQEVmgSf2DMCTMG7nK2wjxcjX6Nk5zj2Noz021WoNzBSVIk99E6uWbSJXfRN6N+iGG08mpJTHcEBFZKH+Zi0ne/JtznLyb+ntXrtwsR5/Adrh1uxJp8ltIvXwTp+Q3kZ53C7cb6O2pi4OSqaUw3BARUYMEQf/SyP86movlyReQdb2s3mttnBzwQFBbRAa3Q7+gdqhUazDznyfrbcfp5NRSGG6IiKhBQZ76n2f1a85N7eedvNzQ726QiQxuhy4+7rC3+2Mm1295N/Udgj031GIYboiIqEENDSh+rE8AHn0gAH2D2uldH+de97u1RWRqDDdERNSghqaTz3uke5PH8TR0a6uBZqJmY58gERE1yBTTyRu6tRXoad0zpRSqcqRkKTml3QKx54aIiBrV3OnktnhbiosSWjaGGyIiuq/mTCe3xNtSxjwKQlVehbOKYhzLLsLHey9q2zUCMG9bBoaEeXPdHgvBcENERC3K1LelmvuMqvv1umg0AvJu3kZmfjHOKoqRqSjBWUUxrt5q+PaTIACnLt/E2N4MN5aA4YaIiFqUKW9L/evoZSzacRqCAbeDNBoBuUVlOJNfjGM5N/DNr5f/eE0A5m3NQFFZJfJvleOsogTnFMUNzhLr0NYF7VwdcTq/uN5rHCBtORhuiIioRRl7W6q8Uo1MRTEyrtxCxtVipMlvIlv5x6KB+m4HVVSrcfFaKc7kq3Amvxhn7va+NLZqsgDgg13nddqkDnYI83VHuJ8Hwv090CPAA+F+HpC5OuK3vJt4bFVKveNY+wBpW8JwQ0RELaopt6VqgowKGVdUyLhajNNXVbhYWKIzBV0fQQCW7T4PSCQ4k1+MS4UlqFLX38nJwQ7d/T3gLrXHkayieq9393PH0DAfbZAJ9XKDo73+CcW2OEDa1jDcEBFRi2ooDPzr6GV8fVSOjKu3cKmwVG+Q8XJ3Qu+OMkR0kEFZUoHNx+X1ttly6qrO1zIXR/QM8Lj7IUPPu2HFwd4OClU5YhL3495vJQGwYfqAJo/fuXm70qB2Mj+GGyIialENvenXDSXebZzQq0NNkOl198PXwwmSu2vs/PjbVb3hJiJAhuHhPtpA06Gti3afuvxlLlg6sRfmb82ABjWLvSVO7MVZTjaG4YaIiFpUWxdHve09/T3wcA9f9O4gQ6+OMvh6ODd6nKgQT0iAer0ua6dGGhROmrtuD1k+hhsiImpRDYWSddOiDAoWpux1ac66PWT5GG6IiKhFmTKUsNeFmoLhhoiIWpwpQwl7Xeh+GG6IiMgsGErIXPhUcCIiIrIpDDdERERkUxhuiIiIyKYw3BAREYlIoSpHSpYSClXDTx0nw3BAMRERkUiSTsgxf1sGNAY85VwfhaocOcoyhHq5cdA2GG6IiIjMShAEKEsrcTynCG9szdC263vKeVOYKiDZEoYbIiKiFlBZrYH8RhkuFZYhW1mKrMIyZF0vRfb1UhTfqda7jyAApy7fxNje+sNNWUU1cpRl2o/M/GLsOlOgfd3YgGRrGG6IiIgMoLpdpbf90Hkl5DfKkX29FFnXyyC/cRtqfY86ByCRADJnB9wqrx9yrpfcwaXCEmRfL0NuUU2Iyb5e89/Ckor71ne/gNQaMNwQEREZoKhM/1POd5+9BpzVbXOT2qOTtzs6e7uhs7d7zec+bghp74bvU/Pw5g9n6h3nrR/Pot6B7tHeTYpQLzeEermhQFWOQ5eK6m2TW1Rm0DnZGoYbIiIiA4R6uept7+ztisFdfXSCjK+HEyQSicHfw01qj1BvN4R6uSPUyw2d7oaZEC83yO55yvqSH8/oDTdFpfoDWGvBcENERGSAkT38sOg/mfXav5k5yKBxLm3vCSn3em98BP4yIKhJoUhqr38bqYPhgcqWcJ0bIiIiA/jLXPD+xF6ojQ8SAO8b8ZTzqBBP1I0gEgDDuvs0ubenUq1/TE9ltf721oI9N0RERAYyxVPO/WUuWDqxF+ZvzYAGNb0NiQaGJPbc6MdwQ0REZARTPOW8uSGJPTf6MdwQERGJqDkhiT03+ok+5mb16tUIDQ2Fs7MzIiMjcejQoQa33bZtG0aOHAlvb294eHggOjoau3fvNmO1REREloM9N/qJGm6SkpIwZ84cLFy4EGlpaRg8eDDGjBkDuVyud/tffvkFI0eOxM6dO5Gamophw4Zh3LhxSEtLM3PlRERE4mPPjX4SQRBEi3cDBw5Ev379sGbNGm1beHg4Hn/8cSQmJjbpGD179sSkSZOwaNEiva9XVFSgouKPFR2Li4sRGBgIlUoFDw+P5p0AERGRiF7//jf8O/VKvfZJUR3x/hN9RKio5RQXF0MmkzXp/Vu0npvKykqkpqYiLi5Opz0uLg4pKSlNOoZGo0FJSQk8PT0b3CYxMREymUz7ERgY2Ky6iYiILMXtSv3PqGqovbUQLdwolUqo1Wr4+vrqtPv6+qKgoKCBvXQtW7YMZWVleOqppxrcZv78+VCpVNqPvLy8ZtVNRERElk302VJ1FyoSBKFJixd9++23eOutt/Cf//wHPj4+DW7n5OQEJyenZtdJRERkaVyd9L+Nu0pFf3sXlWhn7+XlBXt7+3q9NIWFhfV6c+pKSkrCjBkz8P3332PEiBEtWSYREZHFaueq/xEO7dz0t7cWot2WkkqliIyMRHJysk57cnIyYmJiGtzv22+/xbRp07B582aMHTu2pcskIiKyWPm3yvW2X22gvbUQtd8qISEBzzzzDKKiohAdHY0vv/wScrkc8fHxAGrGy1y9ehVff/01gJpgM2XKFHzyyScYNGiQttfHxcUFMplMtPMgIiISw/WSCoPaWwtRw82kSZNQVFSEt99+GwqFAhEREdi5cyeCg4MBAAqFQmfNmy+++ALV1dX429/+hr/97W/a9qlTp2Ljxo3mLp+IiEhUbg2MuXFv5WNuRF3nRgyGzJMnIiKyZLM2peKnjPozjMf19sOnkyNFqKjlWMU6N0REREQtgeGGiIjISt2pUuttL2+gvbVguCEiIrJSpRX6VyJuqL21YLghIiKyUg52+he9bai9tWC4ISIislLVGv1zghpqby0YboiIiKwUe270Y7ghIiKyUmWV+gcO326gvbVguCEiIrJSldUag9pbC4YbIiIisikMN0RERGRTGG6IiIjIpjDcEBERkU1huCEiIrJSVRr9A4er1BxQTERERFbodgOPWSjj4xeIiIjIGnGFYv0YboiIiMimMNwQERGRTWG4ISIislINjRtu5eOJGW6IiIisVUOPWaji4xeIiIjIGmkE/QOHG2pvLRhuiIiIyKYw3BAREZFNYbghIiKyUg3dflLzthQRERFZI3UDj19oqL21YLghIiKyUg1lmFaebRhuiIiIrBXDjX4MN0RERFaqoZE1Yo642Xe2AAu3/459ZwtEq8FBtO9MREREzWLqcPP9STl2nS7A6Ag/PBkVZPD+E1YfwSn5LQDApmN56BfUFttejDWyGuMx3BAREVmphu4+GXNXavD7+5F3sxwAsO/cdXy6/xL2JjyE4jtVKC6vQvGdaqjKaz+vQnF5tc5rWYWlyFQU6xzzlPwW9p0twMPhfkZUZDyGGyIiIht05JJSG0RqQsndcHI3kKjuhpLi8ioUlVRAXWd/+Y1yhP39f82uY8mOMww3RERE1HxPrztmsmO1cXaAh7MjPFwc4eHscPe/jvBw+aP9nZ8y9e4rv3nHZHU0FcMNERGRlXJ1lOB2lf4RNmG+7pBpQ4jj3c/vBpO77TKXmoBy+MJ1JO46X+8YSx7tgb8OCoG9neS+tTQUbsTAcENERGSlZjzYCZ/+nFWv/aVhnfHKqO5NPk7PABk2HZdDfqNc2xbk6YKpMaEmqdPcGG6IiIis1CujuuOrI7koq/xjxIyb1N6gYFPrl9eH4/uTcuw5cw1xPX2Nmi1lKRhuiIiIrNiZt0dj2e5z2H2mAKN6+hkVbGo9GRVk1aGmFsMNERGRlXtlVPdmhRpbwxWKiYiIyKYw3BAREZFNYbghIiIim8JwQ0RERDaF4YaIiIhsCsMNERER2RSGGyIiIrIpDDdERERkUxhuiIiIyKYw3BAREZFNYbghIiIim8JwQ0RERDaF4YaIiIhsCsMNERER2RSGGyIiIrIpDDdERERkUxhuiIiIyKYw3BAREZFNYbghIiIim8JwQ0RERDaF4YaIiIhsiujhZvXq1QgNDYWzszMiIyNx6NChRrc/ePAgIiMj4ezsjE6dOuHzzz83U6VERERkDUQNN0lJSZgzZw4WLlyItLQ0DB48GGPGjIFcLte7fU5ODh555BEMHjwYaWlpWLBgAWbPno2tW7eauXIiIiK6V+7SsQa1tySJIAiC2b/rXQMHDkS/fv2wZs0abVt4eDgef/xxJCYm1tv+jTfewI4dO3D27FltW3x8PH777TccPXpU7/eoqKhARUWF9uvi4mIEBgZCpVLBw8PDhGdDREREIfP+q/3clMGmuLgYMpmsSe/fovXcVFZWIjU1FXFxcTrtcXFxSElJ0bvP0aNH620/atQonDx5ElVVVXr3SUxMhEwm034EBgaa5gSIiIiontylY7UfYhEt3CiVSqjVavj6+uq0+/r6oqCgQO8+BQUFerevrq6GUqnUu8/8+fOhUqm0H3l5eaY5ASIiIrJIDmIXIJFIdL4WBKFe2/2219dey8nJCU5OTs2skoiIiKyFaD03Xl5esLe3r9dLU1hYWK93ppafn5/e7R0cHNC+ffsWq5WIiIish2jhRiqVIjIyEsnJyTrtycnJiImJ0btPdHR0ve337NmDqKgoODo6tlitREREZD1EnQqekJCAdevW4auvvsLZs2cxd+5cyOVyxMfHA6gZLzNlyhTt9vHx8bh8+TISEhJw9uxZfPXVV1i/fj1effVVsU6BiIiILIyoY24mTZqEoqIivP3221AoFIiIiMDOnTsRHBwMAFAoFDpr3oSGhmLnzp2YO3cuVq1ahYCAAKxcuRITJ04U6xSIiIjIwoi6zo0YDJknT0RERJbBKta5ISIiImoJDDdERERkUxhuiIiIyKYw3BAREZFNEX2FYnOrHT9dXFwsciVERETUVLXv202ZB9Xqwk1JSQkA8AGaREREVqikpAQymazRbVrdVHCNRoP8/Hy0adOm0WdYGaO4uBiBgYHIy8vjNPMWxOtsHrzO5sHrbD681ubRUtdZEASUlJQgICAAdnaNj6ppdT03dnZ26NixY4t+Dw8PD/6PYwa8zubB62wevM7mw2ttHi1xne/XY1OLA4qJiIjIpjDcEBERkU1huDEhJycnLF68GE5OTmKXYtN4nc2D19k8eJ3Nh9faPCzhOre6AcVERERk29hzQ0RERDaF4YaIiIhsCsMNERER2RSGGyIiIrIpDDcGWr16NUJDQ+Hs7IzIyEgcOnSo0e0PHjyIyMhIODs7o1OnTvj888/NVKl1M+Q6b9u2DSNHjoS3tzc8PDwQHR2N3bt3m7Fa62Xoz3OtI0eOwMHBAQ888EDLFmgjDL3OFRUVWLhwIYKDg+Hk5ITOnTvjq6++MlO11svQ67xp0yb06dMHrq6u8Pf3x/Tp01FUVGSmaq3TL7/8gnHjxiEgIAASiQQ//PDDffcR5X1QoCb77rvvBEdHR2Ht2rVCZmam8PLLLwtubm7C5cuX9W6fnZ0tuLq6Ci+//LKQmZkprF27VnB0dBS2bNli5sqti6HX+eWXXxbef/994fjx48KFCxeE+fPnC46OjsKpU6fMXLl1MfQ617p165bQqVMnIS4uTujTp495irVixlznRx99VBg4cKCQnJws5OTkCMeOHROOHDlixqqtj6HX+dChQ4KdnZ3wySefCNnZ2cKhQ4eEnj17Co8//riZK7cuO3fuFBYuXChs3bpVACBs37690e3Feh9kuDHAgAEDhPj4eJ227t27C/PmzdO7/euvvy50795dp+35558XBg0a1GI12gJDr7M+PXr0EJYsWWLq0myKsdd50qRJwt///ndh8eLFDDdNYOh1/t///ifIZDKhqKjIHOXZDEOv84cffih06tRJp23lypVCx44dW6xGW9OUcCPW+yBvSzVRZWUlUlNTERcXp9MeFxeHlJQUvfscPXq03vajRo3CyZMnUVVV1WK1WjNjrnNdGo0GJSUl8PT0bIkSbYKx13nDhg3IysrC4sWLW7pEm2DMdd6xYweioqLwwQcfoEOHDggLC8Orr76K8vJyc5RslYy5zjExMbhy5Qp27twJQRBw7do1bNmyBWPHjjVHya2GWO+Dre7BmcZSKpVQq9Xw9fXVaff19UVBQYHefQoKCvRuX11dDaVSCX9//xar11oZc53rWrZsGcrKyvDUU0+1RIk2wZjrfPHiRcybNw+HDh2CgwN/dTSFMdc5Ozsbhw8fhrOzM7Zv3w6lUokXX3wRN27c4LibBhhznWNiYrBp0yZMmjQJd+7cQXV1NR599FF8+umn5ii51RDrfZA9NwaSSCQ6XwuCUK/tftvrayddhl7nWt9++y3eeustJCUlwcfHp6XKsxlNvc5qtRqTJ0/GkiVLEBYWZq7ybIYhP88ajQYSiQSbNm3CgAED8Mgjj2D58uXYuHEje2/uw5DrnJmZidmzZ2PRokVITU3Frl27kJOTg/j4eHOU2qqI8T7IP7+ayMvLC/b29vX+CigsLKyXSmv5+fnp3d7BwQHt27dvsVqtmTHXuVZSUhJmzJiB77//HiNGjGjJMq2eode5pKQEJ0+eRFpaGmbNmgWg5k1YEAQ4ODhgz549GD58uFlqtybG/Dz7+/ujQ4cOkMlk2rbw8HAIgoArV66ga9euLVqzNTLmOicmJiI2NhavvfYaAKB3795wc3PD4MGD8e6777Jn3UTEeh9kz00TSaVSREZGIjk5Wac9OTkZMTExeveJjo6ut/2ePXsQFRUFR0fHFqvVmhlznYGaHptp06Zh8+bNvGfeBIZeZw8PD2RkZCA9PV37ER8fj27duiE9PR0DBw40V+lWxZif59jYWOTn56O0tFTbduHCBdjZ2aFjx44tWq+1MuY63759G3Z2um+B9vb2AP7oWaDmE+19sEWHK9uY2qmG69evFzIzM4U5c+YIbm5uQm5uriAIgjBv3jzhmWee0W5fOwVu7ty5QmZmprB+/XpOBW8CQ6/z5s2bBQcHB2HVqlWCQqHQfty6dUusU7AKhl7nujhbqmkMvc4lJSVCx44dhSeeeEI4c+aMcPDgQaFr167CzJkzxToFq2Dodd6wYYPg4OAgrF69WsjKyhIOHz4sREVFCQMGDBDrFKxCSUmJkJaWJqSlpQkAhOXLlwtpaWnaKfeW8j7IcGOgVatWCcHBwYJUKhX69esnHDx4UPva1KlThaFDh+psf+DAAaFv376CVCoVQkJChDVr1pi5YutkyHUeOnSoAKDex9SpU81fuJUx9Of5Xgw3TWfodT579qwwYsQIwcXFRejYsaOQkJAg3L5928xVWx9Dr/PKlSuFHj16CC4uLoK/v7/w9NNPC1euXDFz1dbl559/bvT3raW8D0oEgf1vREREZDs45oaIiIhsCsMNERER2RSGGyIiIrIpDDdERERkUxhuiIiIyKYw3BAREZFNYbghIiIim8JwQ0RERDaF4YaICEBISAhWrFih/VoikeCHH34QrR4iMh7DDRGJbtq0aZBIJJBIJHBwcEBQUBBeeOEF3Lx5U+zSiMgKMdwQkUUYPXo0FAoFcnNzsW7dOvz444948cUXxS6LiKwQww0RWQQnJyf4+fmhY8eOiIuLw6RJk7Bnzx7t6xs2bEB4eDicnZ3RvXt3rF69Wmf/K1eu4M9//jM8PT3h5uaGqKgoHDt2DACQlZWFxx57DL6+vnB3d0f//v2xd+9es54fEZmPg9gFEBHVlZ2djV27dsHR0REAsHbtWixevBifffYZ+vbti7S0NDz33HNwc3PD1KlTUVpaiqFDh6JDhw7YsWMH/Pz8cOrUKWg0GgBAaWkpHnnkEbz77rtwdnbGP//5T4wbNw7nz59HUFCQmKdKRC2A4YaILMJPP/0Ed3d3qNVq3LlzBwCwfPlyAMA777yDZcuWYcKECQCA0NBQZGZm4osvvsDUqVOxefNmXL9+HSdOnICnpycAoEuXLtpj9+nTB3369NF+/e6772L79u3YsWMHZs2aZa5TJCIzYbghIoswbNgwrFmzBrdv38a6detw4cIFvPTSS7h+/Try8vIwY8YMPPfcc9rtq6urIZPJAADp6eno27evNtjUVVZWhiVLluCnn35Cfn4+qqurUV5eDrlcbpZzIyLzYrghIovg5uam7W1ZuXIlhg0bhiVLlmh7VtauXYuBAwfq7GNvbw8AcHFxafTYr732Gnbv3o2PPvoIXbp0gYuLC5544glUVla2wJkQkdgYbojIIi1evBhjxozBCy+8gA4dOiA7OxtPP/203m179+6NdevW4caNG3p7bw4dOoRp06Zh/PjxAGrG4OTm5rZk+UQkIs6WIiKL9NBDD6Fnz55477338NZbbyExMRGffPIJLly4gIyMDGzYsEE7Jucvf/kL/Pz88Pjjj+PIkSPIzs7G1q1bcfToUQA142+2bduG9PR0/Pbbb5g8ebJ2sDER2R6GGyKyWAkJCVi7di1GjRqFdevWYePGjejVqxeGDh2KjRs3IjQ0FAAglUqxZ88e+Pj44JFHHkGvXr2wdOlS7W2rjz/+GO3atUNMTAzGjRuHUaNGoV+/fmKeGhG1IIkgCILYRRARERGZCntuiIiIyKYw3BAREZFNYbghIiIim8JwQ0RERDaF4YaIiIhsCsMNERER2RSGGyIiIrIpDDdERERkUxhuiIiIyKYw3BAREZFNYbghIiIim/L/rhhnNdy9EO4AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chromstartendsample0-ton1-ton2-tonpairwised_dist1pairwised_dist2mean_pairwised_dist...min_dist_to_refS*_scoreprivate_SNP_numhaplorepoverlapoverlap_percentagelabel_one_1label_one_2label_one_3
01050000tsk_50010200.04.2426412.12132...3.316625020000001
11050000tsk_5008200.04.2426412.12132...3.741657031000001
2150000100000tsk_5001380.05.8309522.915476...2.44949030000001
3150000100000tsk_5002180.05.8309522.915476...2.645751041000001
41100000150000tsk_50013130.05.3851652.692582...2.236068020000001
..................................................................
151350000400000tsk_5002140.01.7320510.866025...2.00.0219900001
161400000450000tsk_50012100.04.8989792.44949...2.6457510.0009900001
171400000450000tsk_50012100.04.8989792.44949...3.7416570.0219900001
181450000500000tsk_5002090.06.633253.316625...3.7416570.0209900001
191450000500000tsk_5002490.06.633253.316625...1.4142140.0119900001
\n", + "

2000 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " chrom start end sample 0-ton 1-ton 2-ton pairwised_dist1 \\\n", + "0 1 0 50000 tsk_50 0 10 20 0.0 \n", + "1 1 0 50000 tsk_50 0 8 20 0.0 \n", + "2 1 50000 100000 tsk_50 0 13 8 0.0 \n", + "3 1 50000 100000 tsk_50 0 21 8 0.0 \n", + "4 1 100000 150000 tsk_50 0 13 13 0.0 \n", + ".. ... ... ... ... ... ... ... ... \n", + "15 1 350000 400000 tsk_50 0 2 14 0.0 \n", + "16 1 400000 450000 tsk_50 0 12 10 0.0 \n", + "17 1 400000 450000 tsk_50 0 12 10 0.0 \n", + "18 1 450000 500000 tsk_50 0 20 9 0.0 \n", + "19 1 450000 500000 tsk_50 0 24 9 0.0 \n", + "\n", + " pairwised_dist2 mean_pairwised_dist ... min_dist_to_ref S*_score \\\n", + "0 4.242641 2.12132 ... 3.316625 0 \n", + "1 4.242641 2.12132 ... 3.741657 0 \n", + "2 5.830952 2.915476 ... 2.44949 0 \n", + "3 5.830952 2.915476 ... 2.645751 0 \n", + "4 5.385165 2.692582 ... 2.236068 0 \n", + ".. ... ... ... ... ... \n", + "15 1.732051 0.866025 ... 2.0 0.0 \n", + "16 4.898979 2.44949 ... 2.645751 0.0 \n", + "17 4.898979 2.44949 ... 3.741657 0.0 \n", + "18 6.63325 3.316625 ... 3.741657 0.0 \n", + "19 6.63325 3.316625 ... 1.414214 0.0 \n", + "\n", + " private_SNP_num haplo rep overlap overlap_percentage label_one_1 \\\n", + "0 2 0 0 0 0 0 \n", + "1 3 1 0 0 0 0 \n", + "2 3 0 0 0 0 0 \n", + "3 4 1 0 0 0 0 \n", + "4 2 0 0 0 0 0 \n", + ".. ... ... .. ... ... ... \n", + "15 2 1 99 0 0 0 \n", + "16 0 0 99 0 0 0 \n", + "17 2 1 99 0 0 0 \n", + "18 2 0 99 0 0 0 \n", + "19 1 1 99 0 0 0 \n", + "\n", + " label_one_2 label_one_3 \n", + "0 0 1 \n", + "1 0 1 \n", + "2 0 1 \n", + "3 0 1 \n", + "4 0 1 \n", + ".. ... ... \n", + "15 0 1 \n", + "16 0 1 \n", + "17 0 1 \n", + "18 0 1 \n", + "19 0 1 \n", + "\n", + "[2000 rows x 23 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "infer.predict_introgression_folders(nrep,nref,ntgt, seq_len, thread, output_prefix_test, output_dirs_test, statsmodel=statsmodelsfile, scikitmodel=\"archie_rtr_model_nref50_onemio.scikit.pickle\", evaluate=True, simulated=True, compute_cutoffs=True, plot_curves=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sstar-analysis2", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/sstar/notebooks/archie_example_10000rep.ipynb b/sstar/notebooks/archie_example_10000rep.ipynb deleted file mode 100644 index 24cdff1..0000000 --- a/sstar/notebooks/archie_example_10000rep.ipynb +++ /dev/null @@ -1,8532 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import sstar" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from sstar import preprocess\n", - "from sstar import train\n", - "from sstar import stats\n", - "from sstar import infer" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "#parameters for training\n", - "\n", - "demo_model_file=\"./examples/models/archie2.yaml\"\n", - "nrep = 100\n", - "nref = 25\n", - "ntgt= 25\n", - "ref_id = 'N1'\n", - "tgt_id = 'N2'\n", - "src_id = 'Na'\n", - "seq_len = 50000\n", - "mut_rate = 1.25e-8\n", - "rec_rate = 1e-8\n", - "thread = 6\n", - "output_prefix = \"ld\"\n", - "output_dir = \"v30tlabel\"\n", - "seed = None\n", - "preprocess.store_global_parameters(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "if not os.path.exists(output_dir):\n", - " os.makedirs(output_dir)\n", - "\n", - "#and for the test set\n", - "if not os.path.exists(output_dir + \"test\"):\n", - " os.makedirs(output_dir + \"test\") " - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "#it would also be possible to run only the preprocess functions, but preprocessing is included in the train function\n", - "#preprocess.process_data(vcf_file, ref_ind_file, tgt_ind_file, anc_allele_file, output, win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_25 50000 1.00000 [1, 0, 0] 0\n", - "1 hap_0 tsk_33 50000 1.00000 [1, 0, 0] 0\n", - "2 hap_1 tsk_25 16966 0.33932 [0, 0, 1] 0\n", - "3 hap_1 tsk_31 50000 1.00000 [1, 0, 0] 0\n", - "4 hap_1 tsk_39 50000 1.00000 [1, 0, 0] 0\n", - "5 hap_1 tsk_41 16966 0.33932 [0, 0, 1] 0\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 17 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 28 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 27 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 23 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 26 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 23 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 26 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 29 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 26 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 27 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 27 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 30 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 17 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 32 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 23 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 17 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 30 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 24 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 30 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 31 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 28 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 30 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 31 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 17 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 30 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 19 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 28 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 26 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 17 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 31 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 26 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 18 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 25 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 27 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 26 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 23 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 24 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 17 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 26 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 24 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 18 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 28 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 29 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 24 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 27 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 30 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 28 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 23 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.600992 2.888891 -2.816348 \\\n", - "1 ... 5.850740 1.588839 -2.498919 \n", - "2 ... 5.569553 3.200075 -1.555507 \n", - "3 ... 5.131926 4.203332 -1.635271 \n", - "4 ... 4.554107 3.120110 -1.078993 \n", - "5 ... 5.131926 4.203332 -1.635271 \n", - "6 ... 5.252602 1.670168 -1.356784 \n", - "7 ... 4.785933 3.674841 -0.928572 \n", - "8 ... 5.515987 2.793889 -2.405441 \n", - "9 ... 4.694202 2.784470 -0.990501 \n", - "10 ... 5.529553 3.564039 -1.638199 \n", - "11 ... 4.972101 2.298209 -1.879209 \n", - "12 ... 4.678477 3.931852 -1.040597 \n", - "13 ... 5.600992 2.888891 -2.816348 \n", - "14 ... 5.171819 2.392290 -0.917303 \n", - "15 ... 5.131926 4.203332 -1.635271 \n", - "16 ... 5.600992 2.888891 -2.816348 \n", - "17 ... 4.972101 2.298209 -1.879209 \n", - "18 ... 4.700898 2.161554 -1.365975 \n", - "19 ... 5.413104 2.318309 -2.705835 \n", - "20 ... 5.691373 1.548273 -1.946479 \n", - "21 ... 4.678477 3.931852 -1.040597 \n", - "22 ... 5.413104 2.318309 -2.705835 \n", - "23 ... 5.193599 1.766528 -2.237168 \n", - "24 ... 5.262966 1.361187 -3.608896 \n", - "25 ... 4.972101 2.298209 -1.879209 \n", - "26 ... 5.708955 1.067833 -3.519552 \n", - "27 ... 4.678477 3.931852 -1.040597 \n", - "28 ... 4.554107 3.120110 -1.078993 \n", - "29 ... 5.600992 2.888891 -2.816348 \n", - "30 ... 5.193599 1.766528 -2.237168 \n", - "31 ... 5.515987 2.793889 -2.405441 \n", - "32 ... 4.912628 3.406084 -0.901364 \n", - "33 ... 5.534359 1.230868 -2.583235 \n", - "34 ... 5.243058 0.850344 -3.649053 \n", - "35 ... 5.529553 3.564039 -1.638199 \n", - "36 ... 5.515987 2.793889 -2.405441 \n", - "37 ... 5.131926 4.203332 -1.635271 \n", - "38 ... 4.700898 2.161554 -1.365975 \n", - "39 ... 5.262966 1.361187 -3.608896 \n", - "40 ... 4.554107 3.120110 -1.078993 \n", - "41 ... 4.782687 2.065908 -0.710936 \n", - "42 ... 5.688591 0.939929 -4.146826 \n", - "43 ... 5.727112 2.340190 -2.351057 \n", - "44 ... 4.785933 3.674841 -0.928572 \n", - "45 ... 5.541681 0.909776 -3.764687 \n", - "46 ... 4.956227 2.215816 -0.962793 \n", - "47 ... 5.413104 2.318309 -2.705835 \n", - "48 ... 4.678477 3.931852 -1.040597 \n", - "49 ... 5.131926 4.203332 -1.635271 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 6.466701 5.567764 29267.0 11 \\\n", - "1 7.665456 4.795832 5462.0 10 \n", - "2 1.222981 5.099020 23368.0 11 \n", - "3 1.359911 4.690416 23368.0 7 \n", - "4 0.383590 2.828427 0.0 2 \n", - "5 1.359911 4.690416 23368.0 7 \n", - "6 3.106220 3.464102 0.0 2 \n", - "7 -0.250950 3.316625 2378.0 8 \n", - "8 4.947958 3.316625 2083.0 4 \n", - "9 0.142808 3.000000 0.0 3 \n", - "10 1.522400 5.099020 23368.0 11 \n", - "11 4.219645 4.472136 23368.0 6 \n", - "12 0.060139 3.162278 2378.0 7 \n", - "13 6.466701 5.567764 29267.0 11 \n", - "14 0.597113 3.741657 2378.0 8 \n", - "15 1.359911 4.690416 23368.0 7 \n", - "16 6.466701 5.567764 29267.0 11 \n", - "17 4.219645 4.472136 23368.0 6 \n", - "18 1.850606 2.828427 0.0 1 \n", - "19 6.789710 4.472136 5462.0 6 \n", - "20 6.752086 4.000000 2082.0 6 \n", - "21 0.060139 3.162278 2378.0 7 \n", - "22 6.789710 4.472136 5462.0 6 \n", - "23 6.599473 4.795832 5462.0 4 \n", - "24 13.661067 4.242641 33940.0 4 \n", - "25 4.219645 4.472136 23368.0 6 \n", - "26 16.012644 2.645751 0.0 1 \n", - "27 0.060139 3.162278 2378.0 7 \n", - "28 0.383590 2.828427 0.0 2 \n", - "29 6.466701 5.567764 29267.0 11 \n", - "30 6.599473 4.795832 5462.0 4 \n", - "31 4.947958 3.316625 2083.0 4 \n", - "32 -0.298365 3.464102 2378.0 9 \n", - "33 9.809014 4.582576 947.0 4 \n", - "34 18.206224 2.449490 0.0 4 \n", - "35 1.522400 5.099020 23368.0 11 \n", - "36 4.947958 3.316625 2083.0 4 \n", - "37 1.359911 4.690416 23368.0 7 \n", - "38 1.850606 2.828427 0.0 1 \n", - "39 13.661067 4.242641 33940.0 4 \n", - "40 0.383590 2.828427 0.0 2 \n", - "41 0.563071 3.162278 0.0 3 \n", - "42 21.057346 5.000000 0.0 8 \n", - "43 4.847801 3.605551 2083.0 6 \n", - "44 -0.250950 3.316625 2378.0 8 \n", - "45 20.053455 4.472136 0.0 7 \n", - "46 0.568275 3.605551 2378.0 6 \n", - "47 6.789710 4.472136 5462.0 6 \n", - "48 0.060139 3.162278 2378.0 7 \n", - "49 1.359911 4.690416 23368.0 7 \n", - "\n", - " haplo rep label \n", - "0 0 0 0 \n", - "1 1 0 0 \n", - "2 0 0 0 \n", - "3 1 0 0 \n", - "4 0 0 0 \n", - "5 1 0 0 \n", - "6 0 0 0 \n", - "7 1 0 0 \n", - "8 0 0 0 \n", - "9 1 0 0 \n", - "10 0 0 0 \n", - "11 1 0 0 \n", - "12 0 0 0 \n", - "13 1 0 0 \n", - "14 0 0 0 \n", - "15 1 0 0 \n", - "16 0 0 0 \n", - "17 1 0 0 \n", - "18 0 0 0 \n", - "19 1 0 0 \n", - "20 0 0 0 \n", - "21 1 0 0 \n", - "22 0 0 0 \n", - "23 1 0 0 \n", - "24 0 0 0 \n", - "25 1 0 0 \n", - "26 0 0 0 \n", - "27 1 0 0 \n", - "28 0 0 0 \n", - "29 1 0 0 \n", - "30 0 0 0 \n", - "31 1 0 0 \n", - "32 0 0 0 \n", - "33 1 0 0 \n", - "34 0 0 0 \n", - "35 1 0 0 \n", - "36 0 0 0 \n", - "37 1 0 0 \n", - "38 0 0 0 \n", - "39 1 0 0 \n", - "40 0 0 0 \n", - "41 1 0 0 \n", - "42 0 0 0 \n", - "43 1 0 0 \n", - "44 0 0 0 \n", - "45 1 0 0 \n", - "46 0 0 0 \n", - "47 1 0 0 \n", - "48 0 0 0 \n", - "49 1 0 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_38 21438 0.42876 [0, 0, 1] 4\n", - "1 hap_0 tsk_45 21438 0.42876 [0, 0, 1] 4\n", - "2 hap_1 tsk_43 21438 0.42876 [0, 0, 1] 4\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 18 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 21 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 16 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 16 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 17 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 20 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 18 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 20 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 20 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 20 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 21 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 17 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 21 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 16 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 19 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 21 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 19 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 22 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 19 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 16 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 25 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 17 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 19 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 12 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 17 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 18 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 15 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 22 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 12 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 19 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 25 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 20 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 17 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 20 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 17 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 22 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 22 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 15 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 17 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 18 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 15 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 19 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 22 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 20 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 21 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 17 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 17 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 17 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 16 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 22 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.405963 2.347493 -1.317250 \\\n", - "1 ... 4.948505 0.992293 -2.660500 \n", - "2 ... 4.186653 3.711940 -1.161950 \n", - "3 ... 4.354791 2.635797 -1.078511 \n", - "4 ... 4.256324 2.763706 -1.455907 \n", - "5 ... 4.526915 1.827042 -2.190856 \n", - "6 ... 4.618158 0.712614 -3.388615 \n", - "7 ... 4.889038 0.977306 -2.631105 \n", - "8 ... 4.699862 2.391296 -1.543825 \n", - "9 ... 4.526915 1.827042 -2.190856 \n", - "10 ... 4.448662 1.689404 -2.327747 \n", - "11 ... 4.256324 2.763706 -1.455907 \n", - "12 ... 4.912213 0.990162 -3.174903 \n", - "13 ... 4.186653 3.711940 -1.161950 \n", - "14 ... 4.642588 1.686381 -1.848470 \n", - "15 ... 4.912213 0.990162 -3.174903 \n", - "16 ... 4.647283 2.202760 -1.444431 \n", - "17 ... 5.097455 2.175955 -1.890433 \n", - "18 ... 4.642588 1.686381 -1.848470 \n", - "19 ... 4.186653 3.711940 -1.161950 \n", - "20 ... 5.219179 1.000176 -2.751553 \n", - "21 ... 4.339139 3.331875 -1.155651 \n", - "22 ... 4.836610 1.647204 -2.140916 \n", - "23 ... 4.282485 1.060324 -2.775291 \n", - "24 ... 4.256324 2.763706 -1.455907 \n", - "25 ... 4.418628 2.315730 -1.352540 \n", - "26 ... 4.676724 1.688250 -2.726861 \n", - "27 ... 4.545400 1.739341 -2.382841 \n", - "28 ... 4.282485 1.060324 -2.775291 \n", - "29 ... 4.607055 2.815042 -1.145322 \n", - "30 ... 5.401605 0.822667 -4.365945 \n", - "31 ... 4.526915 1.827042 -2.190856 \n", - "32 ... 4.339139 3.331875 -1.155651 \n", - "33 ... 4.759472 2.107424 -1.478128 \n", - "34 ... 4.327423 3.393410 -1.144391 \n", - "35 ... 5.177851 0.669863 -4.999625 \n", - "36 ... 4.545400 1.739341 -2.382841 \n", - "37 ... 4.676724 1.688250 -2.726861 \n", - "38 ... 4.256324 2.763706 -1.455907 \n", - "39 ... 4.795071 1.487293 -1.724475 \n", - "40 ... 4.676724 1.688250 -2.726861 \n", - "41 ... 4.836610 1.647204 -2.140916 \n", - "42 ... 4.990465 0.935259 -2.601506 \n", - "43 ... 4.699862 2.391296 -1.543825 \n", - "44 ... 4.448662 1.689404 -2.327747 \n", - "45 ... 4.367423 3.125616 -1.114786 \n", - "46 ... 4.327423 3.393410 -1.144391 \n", - "47 ... 4.694118 1.485259 -1.612068 \n", - "48 ... 4.186653 3.711940 -1.161950 \n", - "49 ... 5.097455 2.175955 -1.890433 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 0.844892 2.828427 0.0 1 \\\n", - "1 10.621833 3.741657 3626.0 4 \n", - "2 -0.155797 4.000000 23492.0 6 \n", - "3 -0.144615 4.000000 23491.0 6 \n", - "4 1.209974 2.645751 0.0 0 \n", - "5 4.801193 2.449490 4304.0 2 \n", - "6 15.528887 2.000000 0.0 2 \n", - "7 10.330659 3.316625 3626.0 4 \n", - "8 1.849077 4.000000 0.0 2 \n", - "9 4.801193 2.449490 4304.0 2 \n", - "10 4.638596 3.872983 4304.0 4 \n", - "11 1.209974 2.645751 0.0 0 \n", - "12 12.117090 3.162278 0.0 4 \n", - "13 -0.155797 4.000000 23492.0 6 \n", - "14 3.937538 3.000000 1.0 3 \n", - "15 12.117090 3.162278 0.0 4 \n", - "16 1.464047 3.872983 0.0 1 \n", - "17 3.520151 4.123106 0.0 3 \n", - "18 3.937538 3.000000 1.0 3 \n", - "19 -0.155797 4.000000 23492.0 6 \n", - "20 12.257472 3.872983 3626.0 5 \n", - "21 -0.112643 4.123106 23492.0 7 \n", - "22 5.382508 3.000000 33643.0 5 \n", - "23 9.209383 2.828427 3826.0 2 \n", - "24 1.209974 2.645751 0.0 0 \n", - "25 0.977060 2.828427 0.0 1 \n", - "26 7.207753 3.872983 22828.0 7 \n", - "27 4.874500 4.000000 4304.0 5 \n", - "28 9.209383 2.828427 3826.0 2 \n", - "29 -0.025261 4.358899 23492.0 9 \n", - "30 22.549818 4.358899 3826.0 5 \n", - "31 4.801193 2.449490 4304.0 2 \n", - "32 -0.112643 4.123106 23492.0 7 \n", - "33 1.683488 4.000000 0.0 2 \n", - "34 -0.170643 4.123106 23492.0 7 \n", - "35 29.164544 2.828427 0.0 4 \n", - "36 4.874500 4.000000 4304.0 5 \n", - "37 7.207753 3.872983 22828.0 7 \n", - "38 1.209974 2.645751 0.0 0 \n", - "39 4.028756 3.162278 22828.0 6 \n", - "40 7.207753 3.872983 22828.0 7 \n", - "41 5.382508 3.000000 33643.0 5 \n", - "42 11.690491 3.741657 0.0 5 \n", - "43 1.849077 4.000000 0.0 2 \n", - "44 4.638596 3.872983 4304.0 4 \n", - "45 -0.225084 4.123106 23492.0 7 \n", - "46 -0.170643 4.123106 23492.0 7 \n", - "47 3.498809 3.000000 22828.0 5 \n", - "48 -0.155797 4.000000 23492.0 6 \n", - "49 3.520151 4.123106 0.0 3 \n", - "\n", - " haplo rep label \n", - "0 0 4 0 \n", - "1 1 4 0 \n", - "2 0 4 0 \n", - "3 1 4 0 \n", - "4 0 4 0 \n", - "5 1 4 0 \n", - "6 0 4 0 \n", - "7 1 4 0 \n", - "8 0 4 0 \n", - "9 1 4 0 \n", - "10 0 4 0 \n", - "11 1 4 0 \n", - "12 0 4 0 \n", - "13 1 4 0 \n", - "14 0 4 0 \n", - "15 1 4 0 \n", - "16 0 4 0 \n", - "17 1 4 0 \n", - "18 0 4 0 \n", - "19 1 4 0 \n", - "20 0 4 0 \n", - "21 1 4 0 \n", - "22 0 4 0 \n", - "23 1 4 0 \n", - "24 0 4 0 \n", - "25 1 4 0 \n", - "26 0 4 0 \n", - "27 1 4 0 \n", - "28 0 4 0 \n", - "29 1 4 0 \n", - "30 0 4 0 \n", - "31 1 4 0 \n", - "32 0 4 0 \n", - "33 1 4 0 \n", - "34 0 4 0 \n", - "35 1 4 0 \n", - "36 0 4 0 \n", - "37 1 4 0 \n", - "38 0 4 0 \n", - "39 1 4 0 \n", - "40 0 4 0 \n", - "41 1 4 0 \n", - "42 0 4 0 \n", - "43 1 4 0 \n", - "44 0 4 0 \n", - "45 1 4 0 \n", - "46 0 4 0 \n", - "47 1 4 0 \n", - "48 0 4 0 \n", - "49 1 4 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_29 3579 0.07158 [0, 1, 0] 6\n", - "1 hap_0 tsk_35 10118 0.20236 [0, 1, 0] 6\n", - "2 hap_0 tsk_43 3579 0.07158 [0, 1, 0] 6\n", - "3 hap_1 tsk_31 10118 0.20236 [0, 1, 0] 6\n", - "4 hap_1 tsk_32 3579 0.07158 [0, 1, 0] 6\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 37 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 34 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 33 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 36 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 31 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 31 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 32 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 34 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 32 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 32 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 32 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 31 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 31 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 29 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 29 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 32 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 29 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 31 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 31 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 39 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 28 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 31 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 29 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 30 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 36 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 30 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 29 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 31 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 39 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 30 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 32 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 41 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 31 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 36 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 34 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 32 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 31 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 31 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 32 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 32 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 40 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 32 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 31 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 32 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 36 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 37 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 39 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 29 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 30 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 6.226505 3.730630 -2.064206 \\\n", - "1 ... 6.133563 1.599410 -2.467615 \n", - "2 ... 5.598252 5.239576 -1.379907 \n", - "3 ... 5.944217 2.446282 -1.966502 \n", - "4 ... 5.377320 5.904432 -1.395588 \n", - "5 ... 4.985748 3.162314 -1.625003 \n", - "6 ... 5.090026 3.031633 -1.623558 \n", - "7 ... 5.416306 2.403627 -0.979471 \n", - "8 ... 5.900575 3.723215 -2.108014 \n", - "9 ... 5.090026 3.031633 -1.623558 \n", - "10 ... 5.343141 2.790847 -1.023105 \n", - "11 ... 4.955547 4.422552 -1.070750 \n", - "12 ... 5.377320 5.904432 -1.395588 \n", - "13 ... 5.481937 1.968365 -1.680119 \n", - "14 ... 4.979616 3.703421 -1.154416 \n", - "15 ... 5.900575 3.723215 -2.108014 \n", - "16 ... 4.979616 3.703421 -1.154416 \n", - "17 ... 4.985748 3.162314 -1.625003 \n", - "18 ... 5.377320 5.904432 -1.395588 \n", - "19 ... 5.727763 2.572726 -2.752213 \n", - "20 ... 5.394039 1.964344 -1.592658 \n", - "21 ... 5.377320 5.904432 -1.395588 \n", - "22 ... 4.840933 3.865367 -0.844295 \n", - "23 ... 5.528517 0.975498 -3.450693 \n", - "24 ... 6.182175 3.400709 -1.955669 \n", - "25 ... 4.969896 2.080133 -1.103289 \n", - "26 ... 4.979616 3.703421 -1.154416 \n", - "27 ... 4.955547 4.422552 -1.070750 \n", - "28 ... 5.727763 2.572726 -2.752213 \n", - "29 ... 4.933184 3.883698 -0.927722 \n", - "30 ... 5.438398 6.043823 -1.401121 \n", - "31 ... 5.624477 1.705256 -1.601217 \n", - "32 ... 4.955547 4.422552 -1.070750 \n", - "33 ... 4.933184 3.883698 -0.927722 \n", - "34 ... 5.316595 1.153823 -2.493016 \n", - "35 ... 6.055898 3.266105 -1.941726 \n", - "36 ... 5.900575 3.723215 -2.108014 \n", - "37 ... 4.955547 4.422552 -1.070750 \n", - "38 ... 4.985748 3.162314 -1.625003 \n", - "39 ... 5.438398 6.043823 -1.401121 \n", - "40 ... 5.438398 6.043823 -1.401121 \n", - "41 ... 5.329582 1.575560 -1.247797 \n", - "42 ... 5.438398 6.043823 -1.401121 \n", - "43 ... 5.248235 2.836027 -0.966448 \n", - "44 ... 5.537892 5.111750 -1.373494 \n", - "45 ... 5.944217 2.446282 -1.966502 \n", - "46 ... 6.226505 3.730630 -2.064206 \n", - "47 ... 5.727763 2.572726 -2.752213 \n", - "48 ... 4.979616 3.703421 -1.154416 \n", - "49 ... 4.961468 3.643835 -0.845366 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 3.029907 2.645751 8213.0 4 \\\n", - "1 9.159548 4.795832 8853.0 7 \n", - "2 0.176883 5.916080 8270.0 13 \n", - "3 5.848223 5.000000 8854.0 5 \n", - "4 0.211650 5.744563 8270.0 11 \n", - "5 2.023093 2.236068 37800.0 3 \n", - "6 2.048031 2.449490 37800.0 4 \n", - "7 1.187504 3.316625 30035.0 6 \n", - "8 3.154138 2.449490 0.0 0 \n", - "9 2.048031 2.449490 37800.0 4 \n", - "10 0.869187 2.828427 30035.0 4 \n", - "11 0.251351 4.123106 7537.0 5 \n", - "12 0.211650 5.744563 8270.0 11 \n", - "13 4.202012 4.358899 13196.0 8 \n", - "14 0.968954 2.236068 0.0 1 \n", - "15 3.154138 2.449490 0.0 0 \n", - "16 0.968954 2.236068 0.0 1 \n", - "17 2.023093 2.236068 37800.0 3 \n", - "18 0.211650 5.744563 8270.0 11 \n", - "19 6.953908 5.099020 17593.0 12 \n", - "20 3.807165 4.242641 13196.0 7 \n", - "21 0.211650 5.744563 8270.0 11 \n", - "22 -0.286707 3.872983 1291.0 3 \n", - "23 16.986530 4.690416 3238.0 4 \n", - "24 2.474766 2.449490 3238.0 3 \n", - "25 1.350325 0.000000 0.0 0 \n", - "26 0.968954 2.236068 0.0 1 \n", - "27 0.251351 4.123106 7537.0 5 \n", - "28 6.953908 5.099020 17593.0 12 \n", - "29 -0.004159 4.000000 1291.0 4 \n", - "30 0.217862 5.830952 8270.0 12 \n", - "31 4.857861 4.582576 17592.0 14 \n", - "32 0.251351 4.123106 7537.0 5 \n", - "33 -0.004159 4.000000 1291.0 4 \n", - "34 9.760122 4.472136 1588.0 8 \n", - "35 2.406549 2.000000 0.0 1 \n", - "36 3.154138 2.449490 0.0 0 \n", - "37 0.251351 4.123106 7537.0 5 \n", - "38 2.023093 2.236068 37800.0 3 \n", - "39 0.217862 5.830952 8270.0 12 \n", - "40 0.217862 5.830952 8270.0 12 \n", - "41 4.161689 4.242641 1587.0 6 \n", - "42 0.217862 5.830952 8270.0 12 \n", - "43 0.624922 2.645751 30035.0 3 \n", - "44 0.169452 5.830952 8270.0 12 \n", - "45 5.848223 5.000000 8854.0 5 \n", - "46 3.029907 2.645751 8213.0 4 \n", - "47 6.953908 5.099020 17593.0 12 \n", - "48 0.968954 2.236068 0.0 1 \n", - "49 -0.209113 4.000000 1291.0 4 \n", - "\n", - " haplo rep label \n", - "0 0 6 0 \n", - "1 1 6 0 \n", - "2 0 6 0 \n", - "3 1 6 0 \n", - "4 0 6 0 \n", - "5 1 6 0 \n", - "6 0 6 0 \n", - "7 1 6 0 \n", - "8 0 6 0 \n", - "9 1 6 0 \n", - "10 0 6 0 \n", - "11 1 6 0 \n", - "12 0 6 0 \n", - "13 1 6 0 \n", - "14 0 6 0 \n", - "15 1 6 0 \n", - "16 0 6 0 \n", - "17 1 6 0 \n", - "18 0 6 0 \n", - "19 1 6 0 \n", - "20 0 6 0 \n", - "21 1 6 0 \n", - "22 0 6 0 \n", - "23 1 6 0 \n", - "24 0 6 0 \n", - "25 1 6 0 \n", - "26 0 6 0 \n", - "27 1 6 0 \n", - "28 0 6 0 \n", - "29 1 6 0 \n", - "30 0 6 0 \n", - "31 1 6 0 \n", - "32 0 6 0 \n", - "33 1 6 0 \n", - "34 0 6 0 \n", - "35 1 6 0 \n", - "36 0 6 0 \n", - "37 1 6 0 \n", - "38 0 6 0 \n", - "39 1 6 0 \n", - "40 0 6 0 \n", - "41 1 6 0 \n", - "42 0 6 0 \n", - "43 1 6 0 \n", - "44 0 6 0 \n", - "45 1 6 0 \n", - "46 0 6 0 \n", - "47 1 6 0 \n", - "48 0 6 0 \n", - "49 1 6 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_37 50000 1.0 [1, 0, 0] 8\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 32 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 25 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 26 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 25 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 22 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 27 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 25 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 23 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 25 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 27 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 24 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 32 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 29 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 25 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 27 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 24 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 25 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 25 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 22 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 25 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 25 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 26 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 25 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 23 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 24 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 31 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 29 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 32 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 26 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 23 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 29 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 24 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 27 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 30 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 29 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 25 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 26 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 26 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 25 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 22 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 26 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 29 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 23 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 31 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 29 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 25 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 23 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 30 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 29 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.056645 2.370341 -0.913391 \\\n", - "1 ... 4.807619 4.226796 -1.114008 \n", - "2 ... 4.926197 3.992586 -1.116032 \n", - "3 ... 5.707623 1.083035 -3.234609 \n", - "4 ... 4.485665 2.098812 -2.043857 \n", - "5 ... 4.572363 1.033501 -1.769300 \n", - "6 ... 4.821081 4.137183 -1.107833 \n", - "7 ... 4.397900 1.838478 -1.972303 \n", - "8 ... 4.807619 4.226796 -1.114008 \n", - "9 ... 4.449303 1.623707 -1.790018 \n", - "10 ... 4.799215 0.987535 -2.347110 \n", - "11 ... 5.031428 2.504729 -0.942679 \n", - "12 ... 5.171008 1.320671 -1.901463 \n", - "13 ... 4.662797 2.518323 -0.751454 \n", - "14 ... 5.054059 1.476489 -2.878727 \n", - "15 ... 4.449303 1.623707 -1.790018 \n", - "16 ... 5.220231 1.529193 -2.968933 \n", - "17 ... 4.545323 1.600038 -1.334365 \n", - "18 ... 5.022434 1.555161 -1.323684 \n", - "19 ... 4.485665 2.098812 -2.043857 \n", - "20 ... 4.821081 4.137183 -1.107833 \n", - "21 ... 4.821081 4.137183 -1.107833 \n", - "22 ... 4.650943 1.588733 -1.439142 \n", - "23 ... 4.807619 4.226796 -1.114008 \n", - "24 ... 5.913642 0.768834 -6.095294 \n", - "25 ... 5.220231 1.529193 -2.968933 \n", - "26 ... 5.074244 1.912050 -0.884873 \n", - "27 ... 4.931720 1.738135 -2.290669 \n", - "28 ... 5.129024 0.873114 -3.171144 \n", - "29 ... 4.802935 1.471819 -1.250211 \n", - "30 ... 4.397900 1.838478 -1.972303 \n", - "31 ... 5.152228 1.434547 -1.969633 \n", - "32 ... 5.238379 1.179386 -2.036060 \n", - "33 ... 4.676044 2.034612 -0.610040 \n", - "34 ... 4.989539 1.964498 -1.832701 \n", - "35 ... 4.888981 1.757863 -1.319231 \n", - "36 ... 4.690336 1.540745 -1.152319 \n", - "37 ... 4.811170 1.592642 -1.607467 \n", - "38 ... 4.926197 3.992586 -1.116032 \n", - "39 ... 5.054059 1.476489 -2.878727 \n", - "40 ... 4.485665 2.098812 -2.043857 \n", - "41 ... 4.965089 3.687894 -1.050733 \n", - "42 ... 4.736449 2.626055 -0.830491 \n", - "43 ... 4.397900 1.838478 -1.972303 \n", - "44 ... 4.910528 2.666710 -0.911387 \n", - "45 ... 4.931720 1.738135 -2.290669 \n", - "46 ... 4.690336 1.540745 -1.152319 \n", - "47 ... 4.622442 1.813029 -1.971154 \n", - "48 ... 4.989539 1.964498 -1.832701 \n", - "49 ... 4.776699 1.643143 -1.105464 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 0.947789 3.162278 0.0 6 \\\n", - "1 0.044639 1.414214 0.0 1 \n", - "2 0.094875 1.732051 0.0 2 \n", - "3 15.571219 2.449490 0.0 4 \n", - "4 3.419986 4.000000 0.0 2 \n", - "5 5.965398 3.316625 0.0 1 \n", - "6 0.066693 2.000000 0.0 2 \n", - "7 3.905875 2.236068 0.0 1 \n", - "8 0.044639 1.414214 0.0 1 \n", - "9 3.501026 3.000000 0.0 1 \n", - "10 8.598021 3.464102 7901.0 4 \n", - "11 0.831309 2.828427 0.0 5 \n", - "12 6.309287 2.000000 0.0 3 \n", - "13 0.313724 2.000000 0.0 1 \n", - "14 9.308024 2.828427 0.0 2 \n", - "15 3.501026 3.000000 0.0 1 \n", - "16 9.869455 3.605551 0.0 2 \n", - "17 2.307222 2.236068 0.0 2 \n", - "18 3.040146 3.741657 0.0 2 \n", - "19 3.419986 4.000000 0.0 2 \n", - "20 0.066693 2.000000 0.0 2 \n", - "21 0.066693 2.000000 0.0 2 \n", - "22 2.786207 2.449490 0.0 3 \n", - "23 0.044639 1.414214 0.0 1 \n", - "24 38.403241 5.000000 0.0 8 \n", - "25 9.869455 3.605551 0.0 2 \n", - "26 1.857963 3.000000 7901.0 6 \n", - "27 5.820532 4.358899 0.0 5 \n", - "28 15.564976 4.000000 298.0 4 \n", - "29 2.896604 4.000000 0.0 3 \n", - "30 3.905875 2.236068 0.0 1 \n", - "31 5.445242 4.358899 0.0 6 \n", - "32 8.457502 4.000000 298.0 3 \n", - "33 0.651011 2.236068 0.0 2 \n", - "34 4.075185 3.464102 42306.0 3 \n", - "35 2.040564 3.000000 0.0 1 \n", - "36 2.218106 2.645751 0.0 2 \n", - "37 3.325280 4.000000 0.0 4 \n", - "38 0.094875 1.732051 0.0 2 \n", - "39 9.308024 2.828427 0.0 2 \n", - "40 3.419986 4.000000 0.0 2 \n", - "41 -0.047310 2.236068 0.0 3 \n", - "42 0.413435 2.236068 0.0 2 \n", - "43 3.905875 2.236068 0.0 1 \n", - "44 0.547470 2.645751 0.0 4 \n", - "45 5.820532 4.358899 0.0 5 \n", - "46 2.218106 2.645751 0.0 2 \n", - "47 3.197651 4.123106 0.0 3 \n", - "48 4.075185 3.464102 42306.0 3 \n", - "49 1.959835 3.000000 0.0 1 \n", - "\n", - " haplo rep label \n", - "0 0 8 0 \n", - "1 1 8 0 \n", - "2 0 8 0 \n", - "3 1 8 0 \n", - "4 0 8 0 \n", - "5 1 8 0 \n", - "6 0 8 0 \n", - "7 1 8 0 \n", - "8 0 8 0 \n", - "9 1 8 0 \n", - "10 0 8 0 \n", - "11 1 8 0 \n", - "12 0 8 0 \n", - "13 1 8 0 \n", - "14 0 8 0 \n", - "15 1 8 0 \n", - "16 0 8 0 \n", - "17 1 8 0 \n", - "18 0 8 0 \n", - "19 1 8 0 \n", - "20 0 8 0 \n", - "21 1 8 0 \n", - "22 0 8 0 \n", - "23 1 8 0 \n", - "24 0 8 0 \n", - "25 1 8 0 \n", - "26 0 8 0 \n", - "27 1 8 0 \n", - "28 0 8 0 \n", - "29 1 8 0 \n", - "30 0 8 0 \n", - "31 1 8 0 \n", - "32 0 8 0 \n", - "33 1 8 0 \n", - "34 0 8 0 \n", - "35 1 8 0 \n", - "36 0 8 0 \n", - "37 1 8 0 \n", - "38 0 8 0 \n", - "39 1 8 0 \n", - "40 0 8 0 \n", - "41 1 8 0 \n", - "42 0 8 0 \n", - "43 1 8 0 \n", - "44 0 8 0 \n", - "45 1 8 0 \n", - "46 0 8 0 \n", - "47 1 8 0 \n", - "48 0 8 0 \n", - "49 1 8 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_40 15649 0.31298 [0, 0, 1] 9\n", - "1 hap_0 tsk_42 15649 0.31298 [0, 0, 1] 9\n", - "2 hap_0 tsk_48 15649 0.31298 [0, 0, 1] 9\n", - "3 hap_0 tsk_49 15649 0.31298 [0, 0, 1] 9\n", - "4 hap_1 tsk_30 15649 0.31298 [0, 0, 1] 9\n", - "5 hap_1 tsk_34 15649 0.31298 [0, 0, 1] 9\n", - "6 hap_1 tsk_39 15649 0.31298 [0, 0, 1] 9\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 38 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 33 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 40 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 42 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 37 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 38 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 37 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 31 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 27 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 39 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 30 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 32 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 32 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 30 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 37 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 40 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 30 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 40 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 31 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 40 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 37 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 27 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 28 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 25 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 31 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 41 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 30 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 40 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 37 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 39 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 40 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 37 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 31 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 40 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 28 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 32 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 41 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 38 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 38 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 31 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 35 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 37 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 36 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 35 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 30 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 33 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 27 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 32 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 29 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 6.416981 1.802354 -2.652198 \\\n", - "1 ... 6.184540 1.171467 -3.610874 \n", - "2 ... 5.947916 3.242295 -1.931702 \n", - "3 ... 6.259890 2.193771 -2.025374 \n", - "4 ... 5.562558 2.037948 -1.040360 \n", - "5 ... 5.849948 1.798108 -1.499242 \n", - "6 ... 5.836547 2.594722 -1.963599 \n", - "7 ... 5.647052 2.570802 -1.956515 \n", - "8 ... 5.201470 3.284714 -1.419783 \n", - "9 ... 6.049049 3.589010 -1.311671 \n", - "10 ... 4.960372 3.534706 -1.514254 \n", - "11 ... 6.013193 2.741515 -2.382472 \n", - "12 ... 5.215997 1.973379 -1.157610 \n", - "13 ... 4.960372 3.534706 -1.514254 \n", - "14 ... 6.497780 1.838858 -2.649458 \n", - "15 ... 5.947916 3.242295 -1.931702 \n", - "16 ... 5.485222 2.812344 -1.366633 \n", - "17 ... 5.976200 2.945030 -1.816952 \n", - "18 ... 5.647052 2.570802 -1.956515 \n", - "19 ... 6.042489 3.788333 -1.785147 \n", - "20 ... 5.842578 4.124288 -1.427861 \n", - "21 ... 5.201470 3.284714 -1.419783 \n", - "22 ... 5.627529 2.030920 -1.685777 \n", - "23 ... 5.458743 1.422121 -1.618437 \n", - "24 ... 5.105549 3.033374 -1.404874 \n", - "25 ... 6.206866 1.734815 -2.143391 \n", - "26 ... 4.960372 3.534706 -1.514254 \n", - "27 ... 5.996580 2.701026 -1.790395 \n", - "28 ... 5.836547 2.594722 -1.963599 \n", - "29 ... 6.355768 2.024209 -2.043691 \n", - "30 ... 6.042489 3.788333 -1.785147 \n", - "31 ... 5.842578 4.124288 -1.427861 \n", - "32 ... 5.237388 1.989765 -1.136591 \n", - "33 ... 5.485222 2.812344 -1.366633 \n", - "34 ... 6.042489 3.788333 -1.785147 \n", - "35 ... 5.503184 2.014963 -1.722009 \n", - "36 ... 5.381801 1.976217 -1.220649 \n", - "37 ... 6.031721 1.238337 -3.065421 \n", - "38 ... 6.054280 2.565698 -2.308541 \n", - "39 ... 6.054280 2.565698 -2.308541 \n", - "40 ... 5.245520 2.424521 -1.217722 \n", - "41 ... 5.497861 1.273525 -2.092990 \n", - "42 ... 5.842578 4.124288 -1.427861 \n", - "43 ... 5.864236 1.670738 -2.002216 \n", - "44 ... 5.932478 1.945701 -1.657203 \n", - "45 ... 4.960372 3.534706 -1.514254 \n", - "46 ... 6.107899 2.553574 -2.332952 \n", - "47 ... 5.201470 3.284714 -1.419783 \n", - "48 ... 6.013193 2.741515 -2.382472 \n", - "49 ... 6.021280 1.604183 -2.117335 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 9.659857 5.196152 1.0 3 \\\n", - "1 18.662272 5.744563 0.0 3 \n", - "2 3.320354 4.242641 0.0 3 \n", - "3 4.981342 4.582576 12524.0 6 \n", - "4 2.471644 3.872983 69.0 4 \n", - "5 4.974132 4.242641 0.0 3 \n", - "6 4.468376 5.099020 12524.0 5 \n", - "7 4.023788 3.000000 1.0 2 \n", - "8 1.926442 4.000000 0.0 2 \n", - "9 1.595779 4.000000 69.0 7 \n", - "10 1.567742 3.872983 5224.0 5 \n", - "11 5.836304 6.244998 5224.0 10 \n", - "12 1.930308 3.872983 0.0 1 \n", - "13 1.567742 3.872983 5224.0 5 \n", - "14 9.845722 5.477226 1.0 3 \n", - "15 3.320354 4.242641 0.0 3 \n", - "16 1.833723 4.358899 0.0 3 \n", - "17 2.866972 4.242641 0.0 3 \n", - "18 4.023788 3.000000 1.0 2 \n", - "19 2.977846 5.744563 12714.0 9 \n", - "20 1.883966 3.741657 69.0 5 \n", - "21 1.926442 4.000000 0.0 2 \n", - "22 3.413698 2.449490 0.0 0 \n", - "23 6.701469 4.690416 0.0 4 \n", - "24 1.298002 4.000000 5224.0 6 \n", - "25 7.426724 4.690416 0.0 2 \n", - "26 1.567742 3.872983 5224.0 5 \n", - "27 2.994319 4.242641 0.0 2 \n", - "28 4.468376 5.099020 12524.0 5 \n", - "29 5.887426 5.830952 5224.0 10 \n", - "30 2.977846 5.744563 12714.0 9 \n", - "31 1.883966 3.741657 69.0 5 \n", - "32 1.969600 4.000000 1.0 3 \n", - "33 1.833723 4.358899 0.0 3 \n", - "34 2.977846 5.744563 12714.0 9 \n", - "35 3.344051 2.828427 0.0 0 \n", - "36 2.256318 4.123106 0.0 2 \n", - "37 15.047715 5.385165 69.0 7 \n", - "38 5.624858 5.567764 0.0 5 \n", - "39 5.624858 5.567764 0.0 5 \n", - "40 1.243520 3.741657 5224.0 6 \n", - "41 8.964948 3.741657 12714.0 3 \n", - "42 1.883966 3.741657 69.0 5 \n", - "43 6.547635 4.795832 0.0 2 \n", - "44 4.661797 4.898979 0.0 5 \n", - "45 1.567742 3.872983 5224.0 5 \n", - "46 5.656447 6.324555 5224.0 11 \n", - "47 1.926442 4.000000 0.0 2 \n", - "48 5.836304 6.244998 5224.0 10 \n", - "49 8.076440 4.582576 0.0 5 \n", - "\n", - " haplo rep label \n", - "0 0 9 0 \n", - "1 1 9 0 \n", - "2 0 9 0 \n", - "3 1 9 0 \n", - "4 0 9 0 \n", - "5 1 9 0 \n", - "6 0 9 0 \n", - "7 1 9 0 \n", - "8 0 9 0 \n", - "9 1 9 0 \n", - "10 0 9 0 \n", - "11 1 9 0 \n", - "12 0 9 0 \n", - "13 1 9 0 \n", - "14 0 9 0 \n", - "15 1 9 0 \n", - "16 0 9 0 \n", - "17 1 9 0 \n", - "18 0 9 0 \n", - "19 1 9 0 \n", - "20 0 9 0 \n", - "21 1 9 0 \n", - "22 0 9 0 \n", - "23 1 9 0 \n", - "24 0 9 0 \n", - "25 1 9 0 \n", - "26 0 9 0 \n", - "27 1 9 0 \n", - "28 0 9 0 \n", - "29 1 9 0 \n", - "30 0 9 0 \n", - "31 1 9 0 \n", - "32 0 9 0 \n", - "33 1 9 0 \n", - "34 0 9 0 \n", - "35 1 9 0 \n", - "36 0 9 0 \n", - "37 1 9 0 \n", - "38 0 9 0 \n", - "39 1 9 0 \n", - "40 0 9 0 \n", - "41 1 9 0 \n", - "42 0 9 0 \n", - "43 1 9 0 \n", - "44 0 9 0 \n", - "45 1 9 0 \n", - "46 0 9 0 \n", - "47 1 9 0 \n", - "48 0 9 0 \n", - "49 1 9 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_1 tsk_30 50000 1.0 [1, 0, 0] 10\n", - "1 hap_1 tsk_41 50000 1.0 [1, 0, 0] 10\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 24 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 24 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 22 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 28 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 30 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 20 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 25 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 28 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 24 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 30 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 20 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 21 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 27 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 29 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 28 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 28 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 23 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 20 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 25 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 29 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 24 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 24 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 29 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 28 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 29 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 28 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 28 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 25 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 24 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 25 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 25 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 25 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 24 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 19 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 29 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 29 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 28 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 21 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 25 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 28 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 24 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 30 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 28 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 24 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 25 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 27 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 28 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 30 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 21 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 28 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.668710 4.903149 -1.330307 \\\n", - "1 ... 4.668710 4.903149 -1.330307 \n", - "2 ... 5.658283 1.443838 -3.173628 \n", - "3 ... 5.447072 2.229402 -2.351286 \n", - "4 ... 5.907813 1.757747 -2.821190 \n", - "5 ... 5.392742 2.578335 -2.601612 \n", - "6 ... 5.756888 1.318244 -3.081140 \n", - "7 ... 4.961027 4.288212 -1.804031 \n", - "8 ... 4.668710 4.903149 -1.330307 \n", - "9 ... 5.814740 2.088795 -2.710183 \n", - "10 ... 5.392742 2.578335 -2.601612 \n", - "11 ... 6.024861 1.321056 -4.124680 \n", - "12 ... 5.879184 1.415190 -3.059427 \n", - "13 ... 5.827541 1.739768 -2.752914 \n", - "14 ... 4.961027 4.288212 -1.804031 \n", - "15 ... 4.961027 4.288212 -1.804031 \n", - "16 ... 5.694544 0.952170 -3.876380 \n", - "17 ... 5.392742 2.578335 -2.601612 \n", - "18 ... 5.209675 1.599284 -2.801468 \n", - "19 ... 5.710285 1.732644 -2.151852 \n", - "20 ... 4.668710 4.903149 -1.330307 \n", - "21 ... 4.668710 4.903149 -1.330307 \n", - "22 ... 5.277219 2.730957 -1.267508 \n", - "23 ... 5.180020 2.787392 -1.212427 \n", - "24 ... 5.509007 1.230847 -2.735257 \n", - "25 ... 5.447072 2.229402 -2.351286 \n", - "26 ... 4.961027 4.288212 -1.804031 \n", - "27 ... 5.193969 2.122689 -2.201574 \n", - "28 ... 5.297605 1.435385 -2.308831 \n", - "29 ... 5.209675 1.599284 -2.801468 \n", - "30 ... 5.522352 1.283626 -2.351700 \n", - "31 ... 5.193969 2.122689 -2.201574 \n", - "32 ... 4.668710 4.903149 -1.330307 \n", - "33 ... 5.868584 1.259726 -4.059492 \n", - "34 ... 5.313154 1.150396 -2.251333 \n", - "35 ... 5.710285 1.732644 -2.151852 \n", - "36 ... 4.961027 4.288212 -1.804031 \n", - "37 ... 5.575060 1.418711 -3.103253 \n", - "38 ... 5.485193 1.012663 -3.155064 \n", - "39 ... 5.389166 2.256891 -1.314486 \n", - "40 ... 4.668710 4.903149 -1.330307 \n", - "41 ... 5.814740 2.088795 -2.710183 \n", - "42 ... 5.115172 1.295012 -1.683428 \n", - "43 ... 4.668710 4.903149 -1.330307 \n", - "44 ... 5.572920 1.402564 -2.313708 \n", - "45 ... 5.602641 1.350408 -2.691938 \n", - "46 ... 4.961027 4.288212 -1.804031 \n", - "47 ... 5.376094 0.957610 -3.225713 \n", - "48 ... 5.511630 2.241930 -2.553705 \n", - "49 ... 4.961027 4.288212 -1.804031 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 0.339448 2.828427 0.0 2 \\\n", - "1 0.339448 2.828427 0.0 2 \n", - "2 11.470284 3.605551 43330.0 4 \n", - "3 5.153251 2.645751 39295.0 4 \n", - "4 8.963040 4.690416 23817.0 4 \n", - "5 5.809157 2.449490 0.0 0 \n", - "6 11.327444 4.582576 0.0 2 \n", - "7 1.705925 4.690416 9277.0 5 \n", - "8 0.339448 2.828427 0.0 2 \n", - "9 7.949111 3.162278 5409.0 3 \n", - "10 5.809157 2.449490 0.0 0 \n", - "11 17.318865 5.656854 37782.0 10 \n", - "12 10.763903 5.000000 0.0 3 \n", - "13 8.563978 4.582576 23817.0 3 \n", - "14 1.705925 4.690416 9277.0 5 \n", - "15 1.705925 4.690416 9277.0 5 \n", - "16 20.508414 3.605551 0.0 1 \n", - "17 5.809157 2.449490 0.0 0 \n", - "18 8.728725 3.605551 0.0 1 \n", - "19 6.691941 4.358899 0.0 2 \n", - "20 0.339448 2.828427 0.0 2 \n", - "21 0.339448 2.828427 0.0 2 \n", - "22 0.903644 2.645751 22555.0 5 \n", - "23 0.637424 2.449490 22555.0 4 \n", - "24 9.940785 4.582576 0.0 4 \n", - "25 5.153251 2.645751 39295.0 4 \n", - "26 1.705925 4.690416 9277.0 5 \n", - "27 4.464649 2.000000 0.0 1 \n", - "28 6.218276 3.000000 0.0 1 \n", - "29 8.728725 3.605551 0.0 1 \n", - "30 8.909781 4.000000 0.0 1 \n", - "31 4.464649 2.000000 0.0 1 \n", - "32 0.339448 2.828427 0.0 2 \n", - "33 16.957191 5.477226 37782.0 8 \n", - "34 9.463963 4.582576 0.0 1 \n", - "35 6.691941 4.358899 0.0 2 \n", - "36 1.705925 4.690416 9277.0 5 \n", - "37 11.081543 3.464102 43330.0 3 \n", - "38 14.967796 3.605551 0.0 3 \n", - "39 1.513238 2.000000 22555.0 3 \n", - "40 0.339448 2.828427 0.0 2 \n", - "41 7.949111 3.162278 5409.0 3 \n", - "42 5.819031 3.162278 0.0 1 \n", - "43 0.339448 2.828427 0.0 2 \n", - "44 7.617559 4.123106 9276.0 5 \n", - "45 9.202012 3.605551 0.0 2 \n", - "46 1.705925 4.690416 9277.0 5 \n", - "47 15.769400 4.690416 0.0 5 \n", - "48 5.680827 2.645751 0.0 1 \n", - "49 1.705925 4.690416 9277.0 5 \n", - "\n", - " haplo rep label \n", - "0 0 10 0 \n", - "1 1 10 0 \n", - "2 0 10 0 \n", - "3 1 10 0 \n", - "4 0 10 0 \n", - "5 1 10 0 \n", - "6 0 10 0 \n", - "7 1 10 0 \n", - "8 0 10 0 \n", - "9 1 10 0 \n", - "10 0 10 0 \n", - "11 1 10 0 \n", - "12 0 10 0 \n", - "13 1 10 0 \n", - "14 0 10 0 \n", - "15 1 10 0 \n", - "16 0 10 0 \n", - "17 1 10 0 \n", - "18 0 10 0 \n", - "19 1 10 0 \n", - "20 0 10 0 \n", - "21 1 10 0 \n", - "22 0 10 0 \n", - "23 1 10 0 \n", - "24 0 10 0 \n", - "25 1 10 0 \n", - "26 0 10 0 \n", - "27 1 10 0 \n", - "28 0 10 0 \n", - "29 1 10 0 \n", - "30 0 10 0 \n", - "31 1 10 0 \n", - "32 0 10 0 \n", - "33 1 10 0 \n", - "34 0 10 0 \n", - "35 1 10 0 \n", - "36 0 10 0 \n", - "37 1 10 0 \n", - "38 0 10 0 \n", - "39 1 10 0 \n", - "40 0 10 0 \n", - "41 1 10 0 \n", - "42 0 10 0 \n", - "43 1 10 0 \n", - "44 0 10 0 \n", - "45 1 10 0 \n", - "46 0 10 0 \n", - "47 1 10 0 \n", - "48 0 10 0 \n", - "49 1 10 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_40 38403 0.76806 [1, 0, 0] 12\n", - "1 hap_0 tsk_43 38403 0.76806 [1, 0, 0] 12\n", - "2 hap_1 tsk_27 23609 0.47218 [0, 0, 1] 12\n", - "3 hap_1 tsk_28 2977 0.05954 [0, 1, 0] 12\n", - "4 hap_1 tsk_38 23609 0.47218 [0, 0, 1] 12\n", - "5 hap_1 tsk_47 23609 0.47218 [0, 0, 1] 12\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 31 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 31 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 29 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 32 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 28 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 26 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 31 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 33 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 29 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 29 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 31 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 29 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 30 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 28 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 31 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 36 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 30 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 27 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 27 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 26 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 33 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 30 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 29 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 28 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 29 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 33 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 26 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 31 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 29 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 22 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 31 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 27 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 33 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 31 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 26 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 22 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 27 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 29 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 26 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 31 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 27 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 29 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 29 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 31 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 26 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 33 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 29 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 30 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 35 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.750809 5.469814 -0.689304 \\\n", - "1 ... 5.527899 1.122332 -2.740666 \n", - "2 ... 6.043445 1.916773 -2.987782 \n", - "3 ... 4.945219 4.624814 -0.582757 \n", - "4 ... 5.907364 2.503054 -2.748652 \n", - "5 ... 5.490323 2.656351 -2.202490 \n", - "6 ... 6.089821 1.234081 -3.455816 \n", - "7 ... 6.226132 1.755284 -2.425619 \n", - "8 ... 4.474335 6.180328 -0.648848 \n", - "9 ... 4.474335 6.180328 -0.648848 \n", - "10 ... 4.750809 5.469814 -0.689304 \n", - "11 ... 4.474335 6.180328 -0.648848 \n", - "12 ... 4.739473 4.817391 -0.527566 \n", - "13 ... 4.725464 3.789987 -0.542828 \n", - "14 ... 4.898756 2.002187 -0.770959 \n", - "15 ... 5.797526 2.068688 -2.659968 \n", - "16 ... 6.378124 1.719533 -2.851100 \n", - "17 ... 5.153100 1.285561 -1.661197 \n", - "18 ... 5.722791 2.489663 -2.395707 \n", - "19 ... 5.845335 2.592059 -2.411933 \n", - "20 ... 5.515377 2.900612 -2.166534 \n", - "21 ... 5.942985 2.120934 -2.899677 \n", - "22 ... 4.676967 5.285984 -0.579912 \n", - "23 ... 5.816064 2.373405 -2.102608 \n", - "24 ... 5.907364 2.503054 -2.748652 \n", - "25 ... 4.474335 6.180328 -0.648848 \n", - "26 ... 6.358259 1.852538 -2.633054 \n", - "27 ... 5.490323 2.656351 -2.202490 \n", - "28 ... 4.750809 5.469814 -0.689304 \n", - "29 ... 6.003424 2.318895 -2.719296 \n", - "30 ... 5.463699 1.627994 -3.143804 \n", - "31 ... 6.118009 1.489966 -3.106419 \n", - "32 ... 5.722791 2.489663 -2.395707 \n", - "33 ... 5.870051 1.262506 -3.116824 \n", - "34 ... 5.797526 2.068688 -2.659968 \n", - "35 ... 5.515377 2.900612 -2.166534 \n", - "36 ... 5.463699 1.627994 -3.143804 \n", - "37 ... 4.820561 3.442189 -0.559587 \n", - "38 ... 4.867334 2.589057 -0.594170 \n", - "39 ... 5.515377 2.900612 -2.166534 \n", - "40 ... 4.750809 5.469814 -0.689304 \n", - "41 ... 5.845335 2.592059 -2.411933 \n", - "42 ... 4.474335 6.180328 -0.648848 \n", - "43 ... 4.604606 5.117607 -0.511667 \n", - "44 ... 5.128825 3.255152 -0.685821 \n", - "45 ... 5.490323 2.656351 -2.202490 \n", - "46 ... 5.942985 2.120934 -2.899677 \n", - "47 ... 4.474335 6.180328 -0.648848 \n", - "48 ... 6.121476 1.927526 -3.042444 \n", - "49 ... 6.322707 1.743372 -2.809528 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -1.001640 2.645751 20230.0 3 \\\n", - "1 12.550488 3.872983 7089.0 5 \n", - "2 8.682420 6.164414 21795.0 11 \n", - "3 -1.238814 2.828427 0.0 4 \n", - "4 6.952327 5.916080 6889.0 10 \n", - "5 4.904961 4.690416 18042.0 5 \n", - "6 15.568525 5.567764 7090.0 9 \n", - "7 7.762890 5.196152 7090.0 8 \n", - "8 -1.181348 2.236068 0.0 1 \n", - "9 -1.181348 2.236068 0.0 1 \n", - "10 -1.001640 2.645751 20230.0 3 \n", - "11 -1.181348 2.236068 0.0 1 \n", - "12 -1.373132 2.449490 0.0 2 \n", - "13 -1.233600 2.236068 0.0 1 \n", - "14 0.825465 3.316625 0.0 1 \n", - "15 8.222883 5.744563 10177.0 3 \n", - "16 9.461221 5.477226 7090.0 10 \n", - "17 6.359307 2.000000 0.0 2 \n", - "18 5.257699 3.316625 29133.0 3 \n", - "19 5.344274 2.645751 29133.0 3 \n", - "20 4.208974 4.472136 8831.0 5 \n", - "21 8.697919 5.196152 10177.0 5 \n", - "22 -1.312852 2.449490 0.0 2 \n", - "23 4.170699 4.795832 8831.0 8 \n", - "24 6.952327 5.916080 6889.0 10 \n", - "25 -1.181348 2.236068 0.0 1 \n", - "26 7.918975 4.795832 7090.0 9 \n", - "27 4.904961 4.690416 18042.0 5 \n", - "28 -1.001640 2.645751 20230.0 3 \n", - "29 6.843983 6.000000 6889.0 11 \n", - "30 10.637098 4.898979 18042.0 6 \n", - "31 11.069867 5.196152 7090.0 5 \n", - "32 5.257699 3.316625 29133.0 3 \n", - "33 12.559319 5.000000 7090.0 6 \n", - "34 8.222883 5.744563 10177.0 3 \n", - "35 4.208974 4.472136 8831.0 5 \n", - "36 10.637098 4.898979 18042.0 6 \n", - "37 -1.077446 1.414214 0.0 1 \n", - "38 -0.376195 3.464102 0.0 1 \n", - "39 4.208974 4.472136 8831.0 5 \n", - "40 -1.001640 2.645751 20230.0 3 \n", - "41 5.344274 2.645751 29133.0 3 \n", - "42 -1.181348 2.236068 0.0 1 \n", - "43 -1.441794 2.236068 0.0 1 \n", - "44 -0.756822 2.449490 0.0 2 \n", - "45 4.904961 4.690416 18042.0 5 \n", - "46 8.697919 5.196152 10177.0 5 \n", - "47 -1.181348 2.236068 0.0 1 \n", - "48 9.068614 6.244998 21795.0 12 \n", - "49 8.984843 5.196152 7090.0 9 \n", - "\n", - " haplo rep label \n", - "0 0 12 0 \n", - "1 1 12 0 \n", - "2 0 12 0 \n", - "3 1 12 0 \n", - "4 0 12 0 \n", - "5 1 12 0 \n", - "6 0 12 0 \n", - "7 1 12 0 \n", - "8 0 12 0 \n", - "9 1 12 0 \n", - "10 0 12 0 \n", - "11 1 12 0 \n", - "12 0 12 0 \n", - "13 1 12 0 \n", - "14 0 12 0 \n", - "15 1 12 0 \n", - "16 0 12 0 \n", - "17 1 12 0 \n", - "18 0 12 0 \n", - "19 1 12 0 \n", - "20 0 12 0 \n", - "21 1 12 0 \n", - "22 0 12 0 \n", - "23 1 12 0 \n", - "24 0 12 0 \n", - "25 1 12 0 \n", - "26 0 12 0 \n", - "27 1 12 0 \n", - "28 0 12 0 \n", - "29 1 12 0 \n", - "30 0 12 0 \n", - "31 1 12 0 \n", - "32 0 12 0 \n", - "33 1 12 0 \n", - "34 0 12 0 \n", - "35 1 12 0 \n", - "36 0 12 0 \n", - "37 1 12 0 \n", - "38 0 12 0 \n", - "39 1 12 0 \n", - "40 0 12 0 \n", - "41 1 12 0 \n", - "42 0 12 0 \n", - "43 1 12 0 \n", - "44 0 12 0 \n", - "45 1 12 0 \n", - "46 0 12 0 \n", - "47 1 12 0 \n", - "48 0 12 0 \n", - "49 1 12 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_34 19030 0.3806 [0, 0, 1] 16\n", - "1 hap_0 tsk_35 19030 0.3806 [0, 0, 1] 16\n", - "2 hap_0 tsk_41 23155 0.4631 [0, 0, 1] 16\n", - "3 hap_0 tsk_44 50000 1.0000 [1, 0, 0] 16\n", - "4 hap_1 tsk_26 23155 0.4631 [0, 0, 1] 16\n", - "5 hap_1 tsk_28 19030 0.3806 [0, 0, 1] 16\n", - "6 hap_1 tsk_32 50000 1.0000 [1, 0, 0] 16\n", - "7 hap_1 tsk_35 19030 0.3806 [0, 0, 1] 16\n", - "8 hap_1 tsk_40 19030 0.3806 [0, 0, 1] 16\n", - "9 hap_1 tsk_41 23155 0.4631 [0, 0, 1] 16\n", - "10 hap_1 tsk_46 19030 0.3806 [0, 0, 1] 16\n", - "11 hap_1 tsk_47 23155 0.4631 [0, 0, 1] 16\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 23 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 22 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 21 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 20 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 29 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 24 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 24 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 29 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 29 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 29 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 24 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 36 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 21 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 25 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 30 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 23 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 30 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 25 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 29 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 37 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 29 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 29 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 22 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 29 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 31 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 30 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 23 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 22 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 27 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 22 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 21 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 29 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 20 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 20 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 26 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 27 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 25 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 29 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 22 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 24 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 23 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 26 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 21 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 29 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 29 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 20 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 22 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 23 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 28 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 25 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 3.831465 1.979872 -1.253141 \\\n", - "1 ... 3.470689 2.374316 -0.775458 \n", - "2 ... 3.604746 1.905806 -1.080646 \n", - "3 ... 4.230339 1.844234 -2.243615 \n", - "4 ... 4.259463 2.036976 -0.984894 \n", - "5 ... 4.008368 1.392988 -0.712993 \n", - "6 ... 3.768011 2.062090 -0.808227 \n", - "7 ... 4.434780 1.992729 -1.816340 \n", - "8 ... 5.432582 0.907054 -3.476025 \n", - "9 ... 4.340053 1.423943 -0.906482 \n", - "10 ... 3.808011 1.839049 -0.663446 \n", - "11 ... 5.156675 1.268706 -2.873656 \n", - "12 ... 4.105751 1.322810 -1.135904 \n", - "13 ... 3.800430 2.336733 -0.853912 \n", - "14 ... 4.999971 1.460293 -2.814692 \n", - "15 ... 5.784891 1.435032 -3.426617 \n", - "16 ... 4.999971 1.460293 -2.814692 \n", - "17 ... 3.800430 2.336733 -0.853912 \n", - "18 ... 4.434780 1.992729 -1.816340 \n", - "19 ... 5.246928 1.289746 -2.959555 \n", - "20 ... 4.643279 1.419964 -2.456672 \n", - "21 ... 4.537156 0.954214 -1.903962 \n", - "22 ... 3.470689 2.374316 -0.775458 \n", - "23 ... 4.823626 1.032630 -1.982799 \n", - "24 ... 4.658083 0.842262 -2.328878 \n", - "25 ... 5.160780 0.786347 -3.566071 \n", - "26 ... 3.831465 1.979872 -1.253141 \n", - "27 ... 3.771666 1.634533 -0.970331 \n", - "28 ... 4.344391 1.306266 -2.223567 \n", - "29 ... 3.470689 2.374316 -0.775458 \n", - "30 ... 3.604746 1.905806 -1.080646 \n", - "31 ... 4.643279 1.419964 -2.456672 \n", - "32 ... 4.230339 1.844234 -2.243615 \n", - "33 ... 4.230339 1.844234 -2.243615 \n", - "34 ... 4.232289 0.947731 -1.140687 \n", - "35 ... 4.344391 1.306266 -2.223567 \n", - "36 ... 4.167675 1.130481 -0.940611 \n", - "37 ... 4.259463 2.036976 -0.984894 \n", - "38 ... 5.703748 1.407256 -3.366469 \n", - "39 ... 3.768011 2.062090 -0.808227 \n", - "40 ... 4.845208 0.783958 -2.994908 \n", - "41 ... 3.982766 1.837573 -0.596892 \n", - "42 ... 3.604746 1.905806 -1.080646 \n", - "43 ... 4.434780 1.992729 -1.816340 \n", - "44 ... 4.709619 1.079491 -2.017619 \n", - "45 ... 4.230339 1.844234 -2.243615 \n", - "46 ... 3.470689 2.374316 -0.775458 \n", - "47 ... 3.831465 1.979872 -1.253141 \n", - "48 ... 4.397919 1.038312 -2.085260 \n", - "49 ... 3.800430 2.336733 -0.853912 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 1.395138 2.449490 28667.0 3 \\\n", - "1 -0.150112 1.732051 0.0 0 \n", - "2 1.003178 2.000000 0.0 1 \n", - "3 4.697779 2.828427 0.0 1 \n", - "4 0.946219 2.449490 4373.0 5 \n", - "5 0.823339 2.236068 0.0 2 \n", - "6 0.115287 2.236068 45096.0 2 \n", - "7 3.140912 3.741657 9902.0 9 \n", - "8 18.634532 4.472136 0.0 2 \n", - "9 1.652579 2.828427 4371.0 4 \n", - "10 -0.144451 2.236068 0.0 2 \n", - "11 10.076588 4.690416 15153.0 6 \n", - "12 1.916944 2.828427 0.0 3 \n", - "13 0.106008 2.000000 1.0 2 \n", - "14 8.916752 2.000000 0.0 1 \n", - "15 13.103180 5.385165 25676.0 13 \n", - "16 8.916752 2.000000 0.0 1 \n", - "17 0.106008 2.000000 1.0 2 \n", - "18 3.140912 3.741657 9902.0 9 \n", - "19 10.540155 4.795832 15153.0 7 \n", - "20 6.672898 4.000000 9902.0 8 \n", - "21 6.966185 2.828427 6976.0 6 \n", - "22 -0.150112 1.732051 0.0 0 \n", - "23 7.798594 3.000000 1.0 3 \n", - "24 11.771163 3.872983 15152.0 4 \n", - "25 20.709512 4.000000 1.0 3 \n", - "26 1.395138 2.449490 28667.0 3 \n", - "27 0.898580 2.236068 0.0 2 \n", - "28 6.262917 3.316625 0.0 2 \n", - "29 -0.150112 1.732051 0.0 0 \n", - "30 1.003178 2.000000 0.0 1 \n", - "31 6.672898 4.000000 9902.0 8 \n", - "32 4.697779 2.828427 0.0 1 \n", - "33 4.697779 2.828427 0.0 1 \n", - "34 5.474032 3.000000 0.0 3 \n", - "35 6.262917 3.316625 0.0 2 \n", - "36 2.815440 3.000000 13343.0 5 \n", - "37 0.946219 2.449490 4373.0 5 \n", - "38 12.779293 5.291503 25676.0 12 \n", - "39 0.115287 2.236068 45096.0 2 \n", - "40 15.548729 4.242641 13343.0 8 \n", - "41 -0.228021 1.732051 0.0 2 \n", - "42 1.003178 2.000000 0.0 1 \n", - "43 3.140912 3.741657 9902.0 9 \n", - "44 6.450665 2.645751 0.0 2 \n", - "45 4.697779 2.828427 0.0 1 \n", - "46 -0.150112 1.732051 0.0 0 \n", - "47 1.395138 2.449490 28667.0 3 \n", - "48 6.270310 3.316625 0.0 2 \n", - "49 0.106008 2.000000 1.0 2 \n", - "\n", - " haplo rep label \n", - "0 0 16 0 \n", - "1 1 16 0 \n", - "2 0 16 0 \n", - "3 1 16 0 \n", - "4 0 16 0 \n", - "5 1 16 0 \n", - "6 0 16 0 \n", - "7 1 16 0 \n", - "8 0 16 0 \n", - "9 1 16 0 \n", - "10 0 16 0 \n", - "11 1 16 0 \n", - "12 0 16 0 \n", - "13 1 16 0 \n", - "14 0 16 0 \n", - "15 1 16 0 \n", - "16 0 16 0 \n", - "17 1 16 0 \n", - "18 0 16 0 \n", - "19 1 16 0 \n", - "20 0 16 0 \n", - "21 1 16 0 \n", - "22 0 16 0 \n", - "23 1 16 0 \n", - "24 0 16 0 \n", - "25 1 16 0 \n", - "26 0 16 0 \n", - "27 1 16 0 \n", - "28 0 16 0 \n", - "29 1 16 0 \n", - "30 0 16 0 \n", - "31 1 16 0 \n", - "32 0 16 0 \n", - "33 1 16 0 \n", - "34 0 16 0 \n", - "35 1 16 0 \n", - "36 0 16 0 \n", - "37 1 16 0 \n", - "38 0 16 0 \n", - "39 1 16 0 \n", - "40 0 16 0 \n", - "41 1 16 0 \n", - "42 0 16 0 \n", - "43 1 16 0 \n", - "44 0 16 0 \n", - "45 1 16 0 \n", - "46 0 16 0 \n", - "47 1 16 0 \n", - "48 0 16 0 \n", - "49 1 16 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_29 50800 1.01600 [1, 0, 0] 22\n", - "1 hap_0 tsk_32 25092 0.50184 [0, 0, 1] 22\n", - "2 hap_0 tsk_33 25092 0.50184 [0, 0, 1] 22\n", - "3 hap_0 tsk_41 50000 1.00000 [1, 0, 0] 22\n", - "4 hap_0 tsk_43 50800 1.01600 [1, 0, 0] 22\n", - "5 hap_0 tsk_44 50000 1.00000 [1, 0, 0] 22\n", - "6 hap_0 tsk_45 50800 1.01600 [1, 0, 0] 22\n", - "7 hap_0 tsk_46 50000 1.00000 [1, 0, 0] 22\n", - "8 hap_0 tsk_48 14780 0.29560 [0, 1, 0] 22\n", - "9 hap_1 tsk_27 50000 1.00000 [1, 0, 0] 22\n", - "10 hap_1 tsk_31 4411 0.08822 [0, 1, 0] 22\n", - "11 hap_1 tsk_32 14780 0.29560 [0, 1, 0] 22\n", - "12 hap_1 tsk_33 50000 1.00000 [1, 0, 0] 22\n", - "13 hap_1 tsk_34 39872 0.79744 [1, 0, 0] 22\n", - "14 hap_1 tsk_37 11534 0.23068 [0, 1, 0] 22\n", - "15 hap_1 tsk_41 4411 0.08822 [0, 1, 0] 22\n", - "16 hap_1 tsk_43 18246 0.36492 [0, 0, 1] 22\n", - "17 hap_1 tsk_49 39872 0.79744 [1, 0, 0] 22\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 29 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 27 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 21 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 19 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 26 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 24 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 29 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 27 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 23 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 23 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 29 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 25 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 21 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 25 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 27 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 25 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 24 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 21 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 26 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 21 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 26 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 25 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 25 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 26 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 25 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 22 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 29 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 28 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 20 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 24 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 29 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 24 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 28 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 25 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 26 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 26 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 23 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 24 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 21 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 23 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 29 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 24 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 25 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 23 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 22 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 28 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 25 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 22 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 26 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.943590 1.013734 -4.049521 \\\n", - "1 ... 5.389091 1.417696 -2.204227 \n", - "2 ... 5.268899 1.898704 -2.516831 \n", - "3 ... 5.404031 0.936451 -3.599724 \n", - "4 ... 5.387394 0.955989 -3.529715 \n", - "5 ... 4.503715 2.896549 -1.916148 \n", - "6 ... 5.519434 2.155846 -3.095484 \n", - "7 ... 5.654258 1.369364 -2.732007 \n", - "8 ... 4.820487 1.862906 -2.124828 \n", - "9 ... 5.438325 1.084618 -3.277592 \n", - "10 ... 5.519434 2.155846 -3.095484 \n", - "11 ... 5.601087 1.207823 -3.236376 \n", - "12 ... 4.946275 1.474364 -2.401838 \n", - "13 ... 5.316016 1.719975 -2.694308 \n", - "14 ... 5.076206 1.732128 -2.533785 \n", - "15 ... 5.459085 1.218389 -3.038701 \n", - "16 ... 5.076206 1.732128 -2.533785 \n", - "17 ... 4.503715 2.896549 -1.916148 \n", - "18 ... 4.912067 1.691601 -2.526199 \n", - "19 ... 5.126138 1.822709 -2.426730 \n", - "20 ... 5.268899 1.898704 -2.516831 \n", - "21 ... 5.447780 1.541696 -3.478955 \n", - "22 ... 5.306901 2.936800 -2.343254 \n", - "23 ... 5.306901 2.936800 -2.343254 \n", - "24 ... 5.447780 1.541696 -3.478955 \n", - "25 ... 5.156825 0.907155 -3.164510 \n", - "26 ... 5.020345 2.016137 -2.287489 \n", - "27 ... 5.183410 1.512256 -2.863799 \n", - "28 ... 5.478881 0.961858 -3.558322 \n", - "29 ... 5.052212 1.015155 -2.617244 \n", - "30 ... 5.334616 1.241875 -2.387854 \n", - "31 ... 5.183410 1.512256 -2.863799 \n", - "32 ... 4.503715 2.896549 -1.916148 \n", - "33 ... 5.316016 1.719975 -2.694308 \n", - "34 ... 5.306901 2.936800 -2.343254 \n", - "35 ... 5.267643 0.831934 -3.885303 \n", - "36 ... 4.759817 2.204145 -1.791663 \n", - "37 ... 5.594062 0.926471 -3.927256 \n", - "38 ... 4.503715 2.896549 -1.916148 \n", - "39 ... 4.912067 1.691601 -2.526199 \n", - "40 ... 4.820487 1.862906 -2.124828 \n", - "41 ... 5.519434 2.155846 -3.095484 \n", - "42 ... 4.503715 2.896549 -1.916148 \n", - "43 ... 4.936267 1.213272 -2.100738 \n", - "44 ... 5.130708 1.855840 -2.250462 \n", - "45 ... 5.257826 1.295263 -2.493749 \n", - "46 ... 5.544205 1.241795 -3.118765 \n", - "47 ... 5.306901 2.936800 -2.343254 \n", - "48 ... 5.020345 2.016137 -2.287489 \n", - "49 ... 5.126138 1.822709 -2.426730 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 21.474367 3.316625 0.0 2 \\\n", - "1 6.857165 1.414214 0.0 1 \n", - "2 6.388233 4.242641 6394.0 4 \n", - "3 17.070315 2.828427 0.0 2 \n", - "4 16.175765 4.690416 0.0 7 \n", - "5 2.246594 4.795832 4345.0 6 \n", - "6 9.045955 5.656854 29473.0 13 \n", - "7 9.071320 4.472136 1.0 3 \n", - "8 4.082938 5.099020 4345.0 7 \n", - "9 13.138943 4.242641 0.0 3 \n", - "10 9.045955 5.656854 29473.0 13 \n", - "11 12.123905 4.690416 621.0 6 \n", - "12 5.877570 4.242641 0.0 1 \n", - "13 8.285399 3.000000 0.0 3 \n", - "14 6.866232 5.099020 7402.0 8 \n", - "15 11.753826 4.472136 276.0 4 \n", - "16 6.866232 5.099020 7402.0 8 \n", - "17 2.246594 4.795832 4345.0 6 \n", - "18 6.446284 4.000000 0.0 0 \n", - "19 6.305607 5.196152 7402.0 9 \n", - "20 6.388233 4.242641 6394.0 4 \n", - "21 12.132989 5.196152 8356.0 5 \n", - "22 4.519830 3.741657 2.0 4 \n", - "23 4.519830 3.741657 2.0 4 \n", - "24 12.132989 5.196152 8356.0 5 \n", - "25 14.518561 5.196152 0.0 7 \n", - "26 5.000555 4.358899 6394.0 5 \n", - "27 9.864911 4.472136 15274.0 6 \n", - "28 17.034905 4.472136 15274.0 8 \n", - "29 10.362533 3.605551 621.0 3 \n", - "30 8.348684 4.472136 6393.0 6 \n", - "31 9.864911 4.472136 15274.0 6 \n", - "32 2.246594 4.795832 4345.0 6 \n", - "33 8.285399 3.000000 0.0 3 \n", - "34 4.519830 3.741657 2.0 4 \n", - "35 19.536600 5.099020 0.0 3 \n", - "36 1.943722 5.000000 4345.0 8 \n", - "37 20.050242 4.472136 0.0 4 \n", - "38 2.246594 4.795832 4345.0 6 \n", - "39 6.446284 4.000000 0.0 0 \n", - "40 4.082938 5.099020 4345.0 7 \n", - "41 9.045955 5.656854 29473.0 13 \n", - "42 2.246594 4.795832 4345.0 6 \n", - "43 6.520644 4.242641 0.0 3 \n", - "44 4.893100 4.472136 6394.0 6 \n", - "45 7.501783 3.872983 1.0 2 \n", - "46 12.167640 4.582576 276.0 5 \n", - "47 4.519830 3.741657 2.0 4 \n", - "48 5.000555 4.358899 6394.0 5 \n", - "49 6.305607 5.196152 7402.0 9 \n", - "\n", - " haplo rep label \n", - "0 0 22 0 \n", - "1 1 22 0 \n", - "2 0 22 0 \n", - "3 1 22 0 \n", - "4 0 22 0 \n", - "5 1 22 0 \n", - "6 0 22 0 \n", - "7 1 22 0 \n", - "8 0 22 0 \n", - "9 1 22 0 \n", - "10 0 22 0 \n", - "11 1 22 0 \n", - "12 0 22 0 \n", - "13 1 22 0 \n", - "14 0 22 0 \n", - "15 1 22 0 \n", - "16 0 22 0 \n", - "17 1 22 0 \n", - "18 0 22 0 \n", - "19 1 22 0 \n", - "20 0 22 0 \n", - "21 1 22 0 \n", - "22 0 22 0 \n", - "23 1 22 0 \n", - "24 0 22 0 \n", - "25 1 22 0 \n", - "26 0 22 0 \n", - "27 1 22 0 \n", - "28 0 22 0 \n", - "29 1 22 0 \n", - "30 0 22 0 \n", - "31 1 22 0 \n", - "32 0 22 0 \n", - "33 1 22 0 \n", - "34 0 22 0 \n", - "35 1 22 0 \n", - "36 0 22 0 \n", - "37 1 22 0 \n", - "38 0 22 0 \n", - "39 1 22 0 \n", - "40 0 22 0 \n", - "41 1 22 0 \n", - "42 0 22 0 \n", - "43 1 22 0 \n", - "44 0 22 0 \n", - "45 1 22 0 \n", - "46 0 22 0 \n", - "47 1 22 0 \n", - "48 0 22 0 \n", - "49 1 22 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_1 tsk_30 49956 0.99912 [1, 0, 0] 26\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 30 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 32 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 33 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 39 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 27 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 39 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 31 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 29 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 31 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 31 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 39 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 25 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 26 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 28 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 36 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 26 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 32 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 32 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 17 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 30 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 26 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 38 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 29 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 28 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 39 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 30 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 37 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 27 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 28 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 26 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 38 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 30 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 32 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 30 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 33 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 31 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 35 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 31 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 31 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 33 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 27 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 26 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 32 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 33 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 29 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 33 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 28 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.165782 2.794697 -0.879774 \\\n", - "1 ... 4.708996 3.785360 -0.373124 \n", - "2 ... 4.668731 3.362947 -0.686414 \n", - "3 ... 5.265613 3.513321 -0.544435 \n", - "4 ... 6.163421 3.252241 -1.663916 \n", - "5 ... 5.265613 3.513321 -0.544435 \n", - "6 ... 4.468247 4.474767 -0.511632 \n", - "7 ... 5.315944 2.980737 -1.737497 \n", - "8 ... 4.468247 4.474767 -0.511632 \n", - "9 ... 4.622204 3.595233 -0.291540 \n", - "10 ... 5.729982 2.767304 -1.866939 \n", - "11 ... 5.920304 0.990006 -4.081721 \n", - "12 ... 5.813776 1.280012 -2.618706 \n", - "13 ... 5.163989 3.293219 -1.729093 \n", - "14 ... 6.139623 3.785030 -1.889016 \n", - "15 ... 5.163989 3.293219 -1.729093 \n", - "16 ... 5.615691 2.024014 -1.718135 \n", - "17 ... 6.139623 3.785030 -1.889016 \n", - "18 ... 4.769320 3.173589 -0.193859 \n", - "19 ... 5.680454 2.212439 -0.876446 \n", - "20 ... 5.598933 1.011954 -3.446621 \n", - "21 ... 4.417561 4.125155 -0.349177 \n", - "22 ... 5.651434 0.861290 -4.490404 \n", - "23 ... 5.637882 2.854282 -1.839576 \n", - "24 ... 5.291541 2.919589 -1.663558 \n", - "25 ... 5.801738 1.419841 -3.566492 \n", - "26 ... 5.744623 2.639304 -1.835882 \n", - "27 ... 5.027247 2.486789 -0.806512 \n", - "28 ... 5.559896 2.807556 -1.782503 \n", - "29 ... 5.720860 1.391762 -3.511705 \n", - "30 ... 5.163989 3.293219 -1.729093 \n", - "31 ... 5.250411 2.153188 -0.675767 \n", - "32 ... 5.165782 2.794697 -0.879774 \n", - "33 ... 5.164191 3.611135 -0.517299 \n", - "34 ... 5.003544 2.684544 -0.934581 \n", - "35 ... 4.608140 4.125044 -0.455340 \n", - "36 ... 5.003544 2.684544 -0.934581 \n", - "37 ... 4.735248 3.897426 -0.454617 \n", - "38 ... 5.082450 2.528706 -0.776358 \n", - "39 ... 5.559437 2.212662 -1.677455 \n", - "40 ... 4.551172 3.886829 -0.350768 \n", - "41 ... 4.468247 4.474767 -0.511632 \n", - "42 ... 4.668731 3.362947 -0.686414 \n", - "43 ... 6.163421 3.252241 -1.663916 \n", - "44 ... 6.139623 3.785030 -1.889016 \n", - "45 ... 4.737370 3.477323 -0.328550 \n", - "46 ... 4.796948 3.589293 -0.422121 \n", - "47 ... 5.315944 2.980737 -1.737497 \n", - "48 ... 4.668731 3.362947 -0.686414 \n", - "49 ... 5.243594 2.824720 -1.610267 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 1.256175 3.464102 0.0 0 \\\n", - "1 -0.902687 3.741657 7161.0 5 \n", - "2 0.201642 2.236068 0.0 1 \n", - "3 -0.106989 3.872983 15275.0 4 \n", - "4 3.052103 2.645751 0.0 2 \n", - "5 -0.106989 3.872983 15275.0 4 \n", - "6 -0.723207 3.605551 0.0 3 \n", - "7 2.283540 2.645751 1.0 2 \n", - "8 -0.723207 3.605551 0.0 3 \n", - "9 -0.909291 3.605551 7161.0 3 \n", - "10 2.955719 3.162278 20170.0 6 \n", - "11 22.256711 5.000000 0.0 5 \n", - "12 11.358772 3.872983 0.0 2 \n", - "13 2.083744 2.449490 0.0 1 \n", - "14 3.279856 2.449490 22336.0 3 \n", - "15 2.083744 2.449490 0.0 1 \n", - "16 3.404851 2.645751 0.0 2 \n", - "17 3.279856 2.449490 22336.0 3 \n", - "18 -0.821310 3.741657 0.0 2 \n", - "19 2.107322 4.242641 15275.0 4 \n", - "20 16.472803 3.605551 0.0 0 \n", - "21 -1.008143 3.464102 0.0 2 \n", - "22 24.670577 4.690416 0.0 1 \n", - "23 2.759965 3.000000 20170.0 5 \n", - "24 1.891456 2.645751 0.0 2 \n", - "25 13.622719 3.872983 0.0 2 \n", - "26 2.909476 3.162278 20170.0 6 \n", - "27 1.225386 2.645751 0.0 0 \n", - "28 2.523440 2.828427 20170.0 4 \n", - "29 13.306687 3.741657 0.0 1 \n", - "30 2.083744 2.449490 0.0 1 \n", - "31 1.213022 4.472136 7161.0 3 \n", - "32 1.256175 3.464102 0.0 0 \n", - "33 -0.221661 3.741657 15275.0 3 \n", - "34 1.579973 2.645751 0.0 1 \n", - "35 -0.817140 3.741657 0.0 4 \n", - "36 1.579973 2.645751 0.0 1 \n", - "37 -0.755050 3.872983 0.0 5 \n", - "38 1.136779 2.828427 0.0 1 \n", - "39 2.732932 1.414214 0.0 1 \n", - "40 -0.942363 3.605551 0.0 3 \n", - "41 -0.723207 3.605551 0.0 3 \n", - "42 0.201642 2.236068 0.0 1 \n", - "43 3.052103 2.645751 0.0 2 \n", - "44 3.279856 2.449490 22336.0 3 \n", - "45 -0.775818 3.741657 7161.0 4 \n", - "46 -0.799279 3.872983 7161.0 5 \n", - "47 2.283540 2.645751 1.0 2 \n", - "48 0.201642 2.236068 0.0 1 \n", - "49 1.721598 2.449490 0.0 1 \n", - "\n", - " haplo rep label \n", - "0 0 26 0 \n", - "1 1 26 0 \n", - "2 0 26 0 \n", - "3 1 26 0 \n", - "4 0 26 0 \n", - "5 1 26 0 \n", - "6 0 26 0 \n", - "7 1 26 0 \n", - "8 0 26 0 \n", - "9 1 26 0 \n", - "10 0 26 0 \n", - "11 1 26 0 \n", - "12 0 26 0 \n", - "13 1 26 0 \n", - "14 0 26 0 \n", - "15 1 26 0 \n", - "16 0 26 0 \n", - "17 1 26 0 \n", - "18 0 26 0 \n", - "19 1 26 0 \n", - "20 0 26 0 \n", - "21 1 26 0 \n", - "22 0 26 0 \n", - "23 1 26 0 \n", - "24 0 26 0 \n", - "25 1 26 0 \n", - "26 0 26 0 \n", - "27 1 26 0 \n", - "28 0 26 0 \n", - "29 1 26 0 \n", - "30 0 26 0 \n", - "31 1 26 0 \n", - "32 0 26 0 \n", - "33 1 26 0 \n", - "34 0 26 0 \n", - "35 1 26 0 \n", - "36 0 26 0 \n", - "37 1 26 0 \n", - "38 0 26 0 \n", - "39 1 26 0 \n", - "40 0 26 0 \n", - "41 1 26 0 \n", - "42 0 26 0 \n", - "43 1 26 0 \n", - "44 0 26 0 \n", - "45 1 26 0 \n", - "46 0 26 0 \n", - "47 1 26 0 \n", - "48 0 26 0 \n", - "49 1 26 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_44 50000 1.0 [1, 0, 0] 27\n", - "1 hap_0 tsk_47 50000 1.0 [1, 0, 0] 27\n", - "2 hap_1 tsk_35 50000 1.0 [1, 0, 0] 27\n", - "3 hap_1 tsk_39 50000 1.0 [1, 0, 0] 27\n", - "4 hap_1 tsk_43 50000 1.0 [1, 0, 0] 27\n", - "5 hap_1 tsk_48 50000 1.0 [1, 0, 0] 27\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 32 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 30 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 22 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 29 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 25 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 25 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 22 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 32 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 33 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 26 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 33 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 33 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 31 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 23 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 29 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 22 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 26 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 27 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 32 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 25 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 28 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 31 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 25 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 28 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 28 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 26 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 27 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 28 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 23 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 24 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 27 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 31 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 33 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 32 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 23 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 25 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 28 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 26 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 30 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 21 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 26 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 29 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 25 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 28 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 24 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 25 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 27 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 26 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 3.817272 2.188432 -0.408419 \\\n", - "1 ... 4.009573 1.243326 -0.070122 \n", - "2 ... 3.794555 1.361353 -0.239166 \n", - "3 ... 4.050942 1.509868 -0.691877 \n", - "4 ... 3.469863 1.720051 -0.363177 \n", - "5 ... 3.469863 1.720051 -0.363177 \n", - "6 ... 3.891973 1.492543 -0.858719 \n", - "7 ... 3.837141 2.116350 -0.385468 \n", - "8 ... 3.982742 1.897770 -0.233293 \n", - "9 ... 3.722856 1.700344 -0.537673 \n", - "10 ... 3.981972 1.943899 -0.312866 \n", - "11 ... 4.260116 2.091413 -0.638177 \n", - "12 ... 4.183624 1.137292 -0.319586 \n", - "13 ... 3.973852 1.608499 -1.152492 \n", - "14 ... 4.005289 1.357657 0.060093 \n", - "15 ... 4.451347 1.865507 -1.749231 \n", - "16 ... 4.097781 1.528195 -1.102778 \n", - "17 ... 3.891973 1.492543 -0.858719 \n", - "18 ... 3.814451 1.369961 -0.717985 \n", - "19 ... 3.622613 1.316675 0.286057 \n", - "20 ... 3.817272 2.188432 -0.408419 \n", - "21 ... 6.027565 3.228458 -2.978865 \n", - "22 ... 3.889224 1.873937 -1.055305 \n", - "23 ... 3.691431 2.213339 -0.310525 \n", - "24 ... 3.469863 1.720051 -0.363177 \n", - "25 ... 3.973852 1.608499 -1.152492 \n", - "26 ... 3.754864 1.300995 0.128803 \n", - "27 ... 3.919234 1.159609 -0.370519 \n", - "28 ... 3.867622 1.561497 -0.465024 \n", - "29 ... 3.889224 1.873937 -1.055305 \n", - "30 ... 4.451347 1.865507 -1.749231 \n", - "31 ... 3.897403 1.050250 0.200811 \n", - "32 ... 3.812264 1.586644 -0.621370 \n", - "33 ... 3.691431 2.213339 -0.310525 \n", - "34 ... 4.260116 2.091413 -0.638177 \n", - "35 ... 3.837141 2.116350 -0.385468 \n", - "36 ... 4.451347 1.865507 -1.749231 \n", - "37 ... 6.027565 3.228458 -2.978865 \n", - "38 ... 3.889224 1.873937 -1.055305 \n", - "39 ... 4.157702 1.353513 -0.507456 \n", - "40 ... 4.262922 1.147499 -0.184695 \n", - "41 ... 3.662149 1.388663 -0.110200 \n", - "42 ... 3.722856 1.700344 -0.537673 \n", - "43 ... 4.148007 1.314042 -0.422751 \n", - "44 ... 6.027565 3.228458 -2.978865 \n", - "45 ... 3.811025 1.116090 0.422103 \n", - "46 ... 4.459884 0.789439 -2.524383 \n", - "47 ... 6.027565 3.228458 -2.978865 \n", - "48 ... 3.925430 1.271001 -0.730565 \n", - "49 ... 3.814451 1.369961 -0.717985 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 0.985043 2.449490 3871.0 4 \\\n", - "1 3.707225 2.000000 0.0 3 \n", - "2 2.155794 2.000000 0.0 1 \n", - "3 2.370999 2.000000 1.0 2 \n", - "4 1.851913 2.000000 0.0 1 \n", - "5 1.851913 2.000000 0.0 1 \n", - "6 2.459336 3.000000 0.0 1 \n", - "7 1.087378 2.449490 3871.0 4 \n", - "8 1.045220 2.645751 3871.0 5 \n", - "9 1.657395 2.645751 0.0 2 \n", - "10 1.126804 2.645751 3871.0 5 \n", - "11 1.669184 3.162278 3871.0 4 \n", - "12 4.474342 3.316625 6738.0 5 \n", - "13 2.626152 3.316625 1.0 5 \n", - "14 2.772598 3.000000 3871.0 5 \n", - "15 4.344588 2.645751 6518.0 3 \n", - "16 2.419402 3.464102 1.0 6 \n", - "17 2.459336 3.000000 0.0 1 \n", - "18 3.193704 3.000000 0.0 2 \n", - "19 2.837946 1.000000 0.0 0 \n", - "20 0.985043 2.449490 3871.0 4 \n", - "21 7.150209 6.480741 34139.0 17 \n", - "22 2.617080 1.732051 0.0 1 \n", - "23 0.774283 2.236068 3871.0 3 \n", - "24 1.851913 2.000000 0.0 1 \n", - "25 2.626152 3.316625 1.0 5 \n", - "26 3.127259 1.414214 0.0 1 \n", - "27 3.454622 2.645751 1.0 2 \n", - "28 1.665106 2.828427 0.0 3 \n", - "29 2.617080 1.732051 0.0 1 \n", - "30 4.344588 2.645751 6518.0 3 \n", - "31 4.927124 2.828427 0.0 2 \n", - "32 2.101621 1.414214 0.0 0 \n", - "33 0.774283 2.236068 3871.0 3 \n", - "34 1.669184 3.162278 3871.0 4 \n", - "35 1.087378 2.449490 3871.0 4 \n", - "36 4.344588 2.645751 6518.0 3 \n", - "37 7.150209 6.480741 34139.0 17 \n", - "38 2.617080 1.732051 0.0 1 \n", - "39 2.153850 3.000000 6738.0 5 \n", - "40 5.318877 3.000000 0.0 3 \n", - "41 1.788834 1.732051 0.0 0 \n", - "42 1.657395 2.645751 0.0 2 \n", - "43 2.620296 3.162278 0.0 5 \n", - "44 7.150209 6.480741 34139.0 17 \n", - "45 4.168187 2.645751 0.0 3 \n", - "46 10.444625 3.741657 0.0 4 \n", - "47 7.150209 6.480741 34139.0 17 \n", - "48 2.404521 3.162278 1.0 4 \n", - "49 3.193704 3.000000 0.0 2 \n", - "\n", - " haplo rep label \n", - "0 0 27 0 \n", - "1 1 27 0 \n", - "2 0 27 0 \n", - "3 1 27 0 \n", - "4 0 27 0 \n", - "5 1 27 0 \n", - "6 0 27 0 \n", - "7 1 27 0 \n", - "8 0 27 0 \n", - "9 1 27 0 \n", - "10 0 27 0 \n", - "11 1 27 0 \n", - "12 0 27 0 \n", - "13 1 27 0 \n", - "14 0 27 0 \n", - "15 1 27 0 \n", - "16 0 27 0 \n", - "17 1 27 0 \n", - "18 0 27 0 \n", - "19 1 27 0 \n", - "20 0 27 0 \n", - "21 1 27 0 \n", - "22 0 27 0 \n", - "23 1 27 0 \n", - "24 0 27 0 \n", - "25 1 27 0 \n", - "26 0 27 0 \n", - "27 1 27 0 \n", - "28 0 27 0 \n", - "29 1 27 0 \n", - "30 0 27 0 \n", - "31 1 27 0 \n", - "32 0 27 0 \n", - "33 1 27 0 \n", - "34 0 27 0 \n", - "35 1 27 0 \n", - "36 0 27 0 \n", - "37 1 27 0 \n", - "38 0 27 0 \n", - "39 1 27 0 \n", - "40 0 27 0 \n", - "41 1 27 0 \n", - "42 0 27 0 \n", - "43 1 27 0 \n", - "44 0 27 0 \n", - "45 1 27 0 \n", - "46 0 27 0 \n", - "47 1 27 0 \n", - "48 0 27 0 \n", - "49 1 27 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_42 14191 0.28382 [0, 1, 0] 29\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 16 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 29 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 29 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 26 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 17 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 17 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 28 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 29 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 19 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 32 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 30 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 30 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 32 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 16 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 27 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 32 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 33 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 29 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 30 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 29 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 32 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 17 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 32 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 31 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 32 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 21 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 20 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 32 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 29 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 30 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 21 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 20 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 16 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 20 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 16 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 32 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 30 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 29 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 17 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 24 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 19 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 30 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 32 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 33 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 17 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 18 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 32 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 21 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 18 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.509047 4.968493 -0.976100 \\\n", - "1 ... 5.491271 2.145947 -2.339909 \n", - "2 ... 5.300374 2.766032 -1.857759 \n", - "3 ... 5.190732 3.036306 -1.461426 \n", - "4 ... 4.602168 4.920046 -0.982613 \n", - "5 ... 4.680440 4.313479 -0.926243 \n", - "6 ... 5.875862 1.454246 -2.686821 \n", - "7 ... 5.300374 2.766032 -1.857759 \n", - "8 ... 5.096625 2.964409 -1.023466 \n", - "9 ... 5.987094 2.134700 -2.767078 \n", - "10 ... 5.607837 1.852162 -2.215078 \n", - "11 ... 5.403127 3.746222 -1.495215 \n", - "12 ... 5.987094 2.134700 -2.767078 \n", - "13 ... 4.509047 4.968493 -0.976100 \n", - "14 ... 5.292592 2.928475 -1.498724 \n", - "15 ... 4.905432 5.556736 -1.289224 \n", - "16 ... 5.076421 4.769954 -1.255832 \n", - "17 ... 5.278297 4.119585 -1.513635 \n", - "18 ... 5.511282 2.165768 -1.510980 \n", - "19 ... 5.491271 2.145947 -2.339909 \n", - "20 ... 4.905432 5.556736 -1.289224 \n", - "21 ... 4.688019 4.282478 -0.936583 \n", - "22 ... 4.905432 5.556736 -1.289224 \n", - "23 ... 5.801591 1.121537 -3.309058 \n", - "24 ... 6.111010 1.355561 -2.714434 \n", - "25 ... 4.889569 1.432117 -2.716161 \n", - "26 ... 4.925517 3.159283 -1.178551 \n", - "27 ... 4.905432 5.556736 -1.289224 \n", - "28 ... 5.300374 2.766032 -1.857759 \n", - "29 ... 5.346558 4.274316 -1.571455 \n", - "30 ... 4.889569 1.432117 -2.716161 \n", - "31 ... 4.925517 3.159283 -1.178551 \n", - "32 ... 4.509047 4.968493 -0.976100 \n", - "33 ... 5.346558 4.274316 -1.571455 \n", - "34 ... 4.847160 0.925038 -2.830929 \n", - "35 ... 4.509047 4.968493 -0.976100 \n", - "36 ... 4.905432 5.556736 -1.289224 \n", - "37 ... 5.346558 4.274316 -1.571455 \n", - "38 ... 5.278297 4.119585 -1.513635 \n", - "39 ... 4.602168 4.920046 -0.982613 \n", - "40 ... 5.635205 1.184469 -2.875815 \n", - "41 ... 4.926263 2.711937 -1.167739 \n", - "42 ... 5.111413 2.093457 -1.362693 \n", - "43 ... 4.905432 5.556736 -1.289224 \n", - "44 ... 5.076421 4.769954 -1.255832 \n", - "45 ... 4.602168 4.920046 -0.982613 \n", - "46 ... 4.739533 4.556830 -0.989437 \n", - "47 ... 4.905432 5.556736 -1.289224 \n", - "48 ... 5.173509 0.894802 -3.429326 \n", - "49 ... 4.739533 4.556830 -0.989437 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.708740 1.414214 0.0 1 \\\n", - "1 5.978973 3.872983 13322.0 13 \n", - "2 3.442242 3.000000 21725.0 4 \n", - "3 0.946321 4.358899 12073.0 12 \n", - "4 -0.699960 1.732051 0.0 2 \n", - "5 -0.822481 1.732051 0.0 2 \n", - "6 9.693425 2.449490 0.0 3 \n", - "7 3.442242 3.000000 21725.0 4 \n", - "8 -0.313458 3.000000 0.0 4 \n", - "9 8.758493 2.828427 3092.0 3 \n", - "10 5.657705 4.000000 13322.0 14 \n", - "11 0.860545 4.795832 12073.0 16 \n", - "12 8.758493 2.828427 3092.0 3 \n", - "13 -0.708740 1.414214 0.0 1 \n", - "14 1.164395 4.472136 12073.0 13 \n", - "15 0.064272 3.872983 3783.0 8 \n", - "16 0.018994 4.000000 3783.0 9 \n", - "17 0.878892 4.690416 12073.0 15 \n", - "18 2.238563 4.358899 12073.0 15 \n", - "19 5.978973 3.872983 13322.0 13 \n", - "20 0.064272 3.872983 3783.0 8 \n", - "21 -0.791386 1.732051 0.0 2 \n", - "22 0.064272 3.872983 3783.0 8 \n", - "23 15.253760 4.472136 3782.0 9 \n", - "24 12.802148 4.582576 4752.0 10 \n", - "25 8.684442 1.732051 31375.0 3 \n", - "26 0.409287 3.162278 0.0 2 \n", - "27 0.064272 3.872983 3783.0 8 \n", - "28 3.442242 3.000000 21725.0 4 \n", - "29 1.100729 4.795832 12073.0 16 \n", - "30 8.684442 1.732051 31375.0 3 \n", - "31 0.409287 3.162278 0.0 2 \n", - "32 -0.708740 1.414214 0.0 1 \n", - "33 1.100729 4.795832 12073.0 16 \n", - "34 11.348109 2.449490 0.0 2 \n", - "35 -0.708740 1.414214 0.0 1 \n", - "36 0.064272 3.872983 3783.0 8 \n", - "37 1.100729 4.795832 12073.0 16 \n", - "38 0.878892 4.690416 12073.0 15 \n", - "39 -0.699960 1.732051 0.0 2 \n", - "40 11.917677 3.316625 0.0 1 \n", - "41 0.470586 3.316625 0.0 2 \n", - "42 1.354936 4.242641 3783.0 7 \n", - "43 0.064272 3.872983 3783.0 8 \n", - "44 0.018994 4.000000 3783.0 9 \n", - "45 -0.699960 1.732051 0.0 2 \n", - "46 -0.647684 2.000000 0.0 3 \n", - "47 0.064272 3.872983 3783.0 8 \n", - "48 15.503263 3.316625 0.0 4 \n", - "49 -0.647684 2.000000 0.0 3 \n", - "\n", - " haplo rep label \n", - "0 0 29 0 \n", - "1 1 29 0 \n", - "2 0 29 0 \n", - "3 1 29 0 \n", - "4 0 29 0 \n", - "5 1 29 0 \n", - "6 0 29 0 \n", - "7 1 29 0 \n", - "8 0 29 0 \n", - "9 1 29 0 \n", - "10 0 29 0 \n", - "11 1 29 0 \n", - "12 0 29 0 \n", - "13 1 29 0 \n", - "14 0 29 0 \n", - "15 1 29 0 \n", - "16 0 29 0 \n", - "17 1 29 0 \n", - "18 0 29 0 \n", - "19 1 29 0 \n", - "20 0 29 0 \n", - "21 1 29 0 \n", - "22 0 29 0 \n", - "23 1 29 0 \n", - "24 0 29 0 \n", - "25 1 29 0 \n", - "26 0 29 0 \n", - "27 1 29 0 \n", - "28 0 29 0 \n", - "29 1 29 0 \n", - "30 0 29 0 \n", - "31 1 29 0 \n", - "32 0 29 0 \n", - "33 1 29 0 \n", - "34 0 29 0 \n", - "35 1 29 0 \n", - "36 0 29 0 \n", - "37 1 29 0 \n", - "38 0 29 0 \n", - "39 1 29 0 \n", - "40 0 29 0 \n", - "41 1 29 0 \n", - "42 0 29 0 \n", - "43 1 29 0 \n", - "44 0 29 0 \n", - "45 1 29 0 \n", - "46 0 29 0 \n", - "47 1 29 0 \n", - "48 0 29 0 \n", - "49 1 29 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_28 50000 1.0000 [1, 0, 0] 31\n", - "1 hap_0 tsk_30 50000 1.0000 [1, 0, 0] 31\n", - "2 hap_0 tsk_35 21725 0.4345 [0, 0, 1] 31\n", - "3 hap_0 tsk_47 50000 1.0000 [1, 0, 0] 31\n", - "4 hap_1 tsk_25 21725 0.4345 [0, 0, 1] 31\n", - "5 hap_1 tsk_27 50000 1.0000 [1, 0, 0] 31\n", - "6 hap_1 tsk_39 28275 0.5655 [0, 0, 1] 31\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 18 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 16 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 17 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 17 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 21 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 21 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 21 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 19 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 16 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 18 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 21 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 18 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 21 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 19 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 18 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 18 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 18 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 17 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 16 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 19 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 16 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 18 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 17 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 21 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 21 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 19 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 18 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 19 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 21 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 18 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 18 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 21 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 21 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 16 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 18 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 19 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 18 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 18 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 20 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 17 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 21 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 22 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 20 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 18 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 21 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 18 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 21 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 19 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 18 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 19 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.719836 1.043150 -2.331749 \\\n", - "1 ... 4.842797 1.427316 -2.864109 \n", - "2 ... 4.080122 1.872602 -1.302695 \n", - "3 ... 4.718864 0.892321 -2.462398 \n", - "4 ... 4.690836 0.916054 -2.238632 \n", - "5 ... 5.086959 2.482847 -2.665092 \n", - "6 ... 5.086959 2.482847 -2.665092 \n", - "7 ... 4.290593 1.950813 -1.545242 \n", - "8 ... 4.328210 1.026598 -1.780849 \n", - "9 ... 4.278333 1.415867 -1.693592 \n", - "10 ... 5.086959 2.482847 -2.665092 \n", - "11 ... 4.179409 1.972542 -1.479505 \n", - "12 ... 4.859013 1.389991 -2.899452 \n", - "13 ... 4.704527 1.067428 -2.438903 \n", - "14 ... 4.007865 2.457015 -1.727424 \n", - "15 ... 4.331242 1.680340 -1.761895 \n", - "16 ... 4.007865 2.457015 -1.727424 \n", - "17 ... 4.618451 1.029909 -2.204862 \n", - "18 ... 4.084446 1.277301 -1.566590 \n", - "19 ... 4.797901 1.340145 -2.715918 \n", - "20 ... 4.842797 1.427316 -2.864109 \n", - "21 ... 4.327113 1.356092 -1.373991 \n", - "22 ... 4.197807 1.418420 -1.623435 \n", - "23 ... 4.958898 2.649327 -2.214077 \n", - "24 ... 4.859013 1.389991 -2.899452 \n", - "25 ... 4.186948 1.949466 -1.646767 \n", - "26 ... 4.212610 1.013921 -1.671635 \n", - "27 ... 4.797901 1.340145 -2.715918 \n", - "28 ... 4.958898 2.649327 -2.214077 \n", - "29 ... 4.791001 0.926310 -2.795989 \n", - "30 ... 4.440176 1.004840 -1.603432 \n", - "31 ... 4.595633 1.320161 -2.601716 \n", - "32 ... 4.595633 1.320161 -2.601716 \n", - "33 ... 4.151196 1.447575 -1.434091 \n", - "34 ... 4.007865 2.457015 -1.727424 \n", - "35 ... 4.290593 1.950813 -1.545242 \n", - "36 ... 4.278333 1.415867 -1.693592 \n", - "37 ... 4.007865 2.457015 -1.727424 \n", - "38 ... 4.429747 1.657345 -1.397217 \n", - "39 ... 4.197807 1.418420 -1.623435 \n", - "40 ... 4.958898 2.649327 -2.214077 \n", - "41 ... 4.962924 0.849385 -3.090719 \n", - "42 ... 4.691127 1.033329 -2.217152 \n", - "43 ... 4.331242 1.680340 -1.761895 \n", - "44 ... 5.086959 2.482847 -2.665092 \n", - "45 ... 4.007865 2.457015 -1.727424 \n", - "46 ... 4.958898 2.649327 -2.214077 \n", - "47 ... 4.288616 0.967769 -1.587376 \n", - "48 ... 4.179409 1.972542 -1.479505 \n", - "49 ... 4.779532 0.756069 -3.073604 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 8.078404 2.236068 0.0 3 \\\n", - "1 8.315783 2.645751 3896.0 2 \n", - "2 0.644892 3.872983 0.0 4 \n", - "3 10.097263 4.242641 3726.0 8 \n", - "4 9.242398 3.464102 0.0 2 \n", - "5 5.797898 5.196152 36503.0 14 \n", - "6 5.797898 5.196152 36503.0 14 \n", - "7 1.678397 4.123106 27345.0 6 \n", - "8 4.457419 4.472136 0.0 6 \n", - "9 4.188343 3.741657 0.0 4 \n", - "10 5.797898 5.196152 36503.0 14 \n", - "11 1.378971 4.000000 0.0 5 \n", - "12 9.055619 2.828427 22087.0 3 \n", - "13 7.369120 2.000000 0.0 2 \n", - "14 1.723297 2.645751 27.0 3 \n", - "15 2.874988 4.690416 0.0 7 \n", - "16 1.723297 2.645751 27.0 3 \n", - "17 7.438338 2.000000 0.0 2 \n", - "18 2.162348 3.316625 27.0 3 \n", - "19 9.077292 3.316625 9616.0 7 \n", - "20 8.315783 2.645751 3896.0 2 \n", - "21 1.884726 4.472136 0.0 6 \n", - "22 3.859419 4.123106 0.0 6 \n", - "23 4.059411 2.000000 8281.0 3 \n", - "24 9.055619 2.828427 22087.0 3 \n", - "25 1.562327 2.828427 27.0 4 \n", - "26 4.388806 3.605551 27.0 5 \n", - "27 9.077292 3.316625 9616.0 7 \n", - "28 4.059411 2.000000 8281.0 3 \n", - "29 10.431410 4.242641 25830.0 8 \n", - "30 5.689717 2.828427 0.0 3 \n", - "31 7.515994 2.449490 1.0 2 \n", - "32 7.515994 2.449490 1.0 2 \n", - "33 1.545857 4.472136 0.0 5 \n", - "34 1.723297 2.645751 27.0 3 \n", - "35 1.678397 4.123106 27345.0 6 \n", - "36 4.188343 3.741657 0.0 4 \n", - "37 1.723297 2.645751 27.0 3 \n", - "38 1.326946 4.242641 0.0 7 \n", - "39 3.859419 4.123106 0.0 6 \n", - "40 4.059411 2.000000 8281.0 3 \n", - "41 14.239745 3.162278 14036.0 5 \n", - "42 7.307977 2.236068 0.0 2 \n", - "43 2.874988 4.690416 0.0 7 \n", - "44 5.797898 5.196152 36503.0 14 \n", - "45 1.723297 2.645751 27.0 3 \n", - "46 4.059411 2.000000 8281.0 3 \n", - "47 5.172303 3.162278 0.0 3 \n", - "48 1.378971 4.000000 0.0 5 \n", - "49 15.676479 3.872983 14036.0 7 \n", - "\n", - " haplo rep label \n", - "0 0 31 0 \n", - "1 1 31 0 \n", - "2 0 31 0 \n", - "3 1 31 0 \n", - "4 0 31 0 \n", - "5 1 31 0 \n", - "6 0 31 0 \n", - "7 1 31 0 \n", - "8 0 31 0 \n", - "9 1 31 0 \n", - "10 0 31 0 \n", - "11 1 31 0 \n", - "12 0 31 0 \n", - "13 1 31 0 \n", - "14 0 31 0 \n", - "15 1 31 0 \n", - "16 0 31 0 \n", - "17 1 31 0 \n", - "18 0 31 0 \n", - "19 1 31 0 \n", - "20 0 31 0 \n", - "21 1 31 0 \n", - "22 0 31 0 \n", - "23 1 31 0 \n", - "24 0 31 0 \n", - "25 1 31 0 \n", - "26 0 31 0 \n", - "27 1 31 0 \n", - "28 0 31 0 \n", - "29 1 31 0 \n", - "30 0 31 0 \n", - "31 1 31 0 \n", - "32 0 31 0 \n", - "33 1 31 0 \n", - "34 0 31 0 \n", - "35 1 31 0 \n", - "36 0 31 0 \n", - "37 1 31 0 \n", - "38 0 31 0 \n", - "39 1 31 0 \n", - "40 0 31 0 \n", - "41 1 31 0 \n", - "42 0 31 0 \n", - "43 1 31 0 \n", - "44 0 31 0 \n", - "45 1 31 0 \n", - "46 0 31 0 \n", - "47 1 31 0 \n", - "48 0 31 0 \n", - "49 1 31 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_36 42542 0.85084 [1, 0, 0] 33\n", - "1 hap_0 tsk_39 50000 1.00000 [1, 0, 0] 33\n", - "2 hap_1 tsk_45 42542 0.85084 [1, 0, 0] 33\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 24 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 19 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 14 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 17 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 18 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 15 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 20 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 19 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 21 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 21 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 20 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 19 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 20 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 15 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 13 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 19 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 19 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 19 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 21 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 15 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 20 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 22 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 17 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 18 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 21 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 22 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 22 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 13 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 17 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 21 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 18 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 19 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 20 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 21 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 21 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 21 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 19 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 21 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 12 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 17 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 16 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 17 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 14 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 21 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 21 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 22 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 20 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 13 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 22 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 22 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.511124 1.529757 -1.079893 \\\n", - "1 ... 4.263355 1.903804 -2.170661 \n", - "2 ... 4.204587 1.041449 -2.600189 \n", - "3 ... 4.936013 1.035781 -2.692710 \n", - "4 ... 4.544400 1.148429 -2.881526 \n", - "5 ... 4.901173 1.338503 -3.070232 \n", - "6 ... 3.819808 3.169068 -1.315376 \n", - "7 ... 4.263355 1.903804 -2.170661 \n", - "8 ... 4.001858 2.665130 -1.293746 \n", - "9 ... 4.001858 2.665130 -1.293746 \n", - "10 ... 3.819808 3.169068 -1.315376 \n", - "11 ... 4.161273 1.243805 -1.267995 \n", - "12 ... 3.819808 3.169068 -1.315376 \n", - "13 ... 4.901173 1.338503 -3.070232 \n", - "14 ... 4.431171 1.364727 -2.224429 \n", - "15 ... 4.263355 1.903804 -2.170661 \n", - "16 ... 4.263355 1.903804 -2.170661 \n", - "17 ... 4.298322 1.324424 -1.994339 \n", - "18 ... 4.776985 2.220417 -2.572052 \n", - "19 ... 4.650605 1.291871 -2.357712 \n", - "20 ... 3.819808 3.169068 -1.315376 \n", - "21 ... 4.223241 2.044234 -1.347892 \n", - "22 ... 5.106020 1.368563 -3.502643 \n", - "23 ... 4.043234 1.252259 -1.131236 \n", - "24 ... 4.776985 2.220417 -2.572052 \n", - "25 ... 4.193073 1.978136 -1.222721 \n", - "26 ... 4.250973 1.689226 -1.106600 \n", - "27 ... 4.478962 1.258900 -2.409351 \n", - "28 ... 5.510167 1.198065 -3.364872 \n", - "29 ... 4.001858 2.665130 -1.293746 \n", - "30 ... 4.544400 1.148429 -2.881526 \n", - "31 ... 4.298322 1.324424 -1.994339 \n", - "32 ... 3.819808 3.169068 -1.315376 \n", - "33 ... 4.776985 2.220417 -2.572052 \n", - "34 ... 4.039056 2.286027 -1.314819 \n", - "35 ... 4.039056 2.286027 -1.314819 \n", - "36 ... 4.480401 1.406005 -1.368842 \n", - "37 ... 4.776985 2.220417 -2.572052 \n", - "38 ... 4.366555 1.293198 -2.336571 \n", - "39 ... 5.174994 0.979441 -3.284026 \n", - "40 ... 5.126137 0.722724 -4.386731 \n", - "41 ... 5.106020 1.368563 -3.502643 \n", - "42 ... 4.204587 1.041449 -2.600189 \n", - "43 ... 4.001858 2.665130 -1.293746 \n", - "44 ... 4.039056 2.286027 -1.314819 \n", - "45 ... 4.193073 1.978136 -1.222721 \n", - "46 ... 3.819808 3.169068 -1.315376 \n", - "47 ... 4.910113 0.810793 -3.483800 \n", - "48 ... 4.223241 2.044234 -1.347892 \n", - "49 ... 4.223241 2.044234 -1.347892 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 1.679547 3.464102 0.0 4 \\\n", - "1 4.471606 3.316625 33513.0 3 \n", - "2 8.874000 3.464102 0.0 3 \n", - "3 9.203758 3.464102 5203.0 5 \n", - "4 10.128584 2.828427 0.0 0 \n", - "5 10.116904 1.732051 0.0 1 \n", - "6 0.207212 4.000000 9892.0 4 \n", - "7 4.471606 3.316625 33513.0 3 \n", - "8 0.246351 4.123106 9892.0 5 \n", - "9 0.246351 4.123106 9892.0 5 \n", - "10 0.207212 4.000000 9892.0 4 \n", - "11 3.023699 3.741657 0.0 4 \n", - "12 0.207212 4.000000 9892.0 4 \n", - "13 10.116904 1.732051 0.0 1 \n", - "14 4.588847 2.449490 0.0 2 \n", - "15 4.471606 3.316625 33513.0 3 \n", - "16 4.471606 3.316625 33513.0 3 \n", - "17 5.494163 3.741657 0.0 4 \n", - "18 5.507078 4.000000 24964.0 5 \n", - "19 5.544369 2.828427 0.0 4 \n", - "20 0.207212 4.000000 9892.0 4 \n", - "21 2.023800 3.741657 0.0 3 \n", - "22 12.084130 4.582576 30584.0 9 \n", - "23 2.458457 3.605551 0.0 3 \n", - "24 5.507078 4.000000 24964.0 5 \n", - "25 0.991426 3.162278 0.0 3 \n", - "26 0.996821 3.464102 0.0 4 \n", - "27 5.653232 2.449490 0.0 3 \n", - "28 12.351485 4.582576 30584.0 12 \n", - "29 0.246351 4.123106 9892.0 5 \n", - "30 10.128584 2.828427 0.0 0 \n", - "31 5.494163 3.741657 0.0 4 \n", - "32 0.207212 4.000000 9892.0 4 \n", - "33 5.507078 4.000000 24964.0 5 \n", - "34 1.162232 3.000000 0.0 2 \n", - "35 1.162232 3.000000 0.0 2 \n", - "36 2.315877 2.645751 0.0 2 \n", - "37 5.507078 4.000000 24964.0 5 \n", - "38 5.138973 2.236068 0.0 2 \n", - "39 13.048590 2.645751 0.0 0 \n", - "40 23.616168 4.242641 5203.0 3 \n", - "41 12.084130 4.582576 30584.0 9 \n", - "42 8.874000 3.464102 0.0 3 \n", - "43 0.246351 4.123106 9892.0 5 \n", - "44 1.162232 3.000000 0.0 2 \n", - "45 0.991426 3.162278 0.0 3 \n", - "46 0.207212 4.000000 9892.0 4 \n", - "47 15.337191 2.645751 0.0 1 \n", - "48 2.023800 3.741657 0.0 3 \n", - "49 2.023800 3.741657 0.0 3 \n", - "\n", - " haplo rep label \n", - "0 0 33 0 \n", - "1 1 33 0 \n", - "2 0 33 0 \n", - "3 1 33 0 \n", - "4 0 33 0 \n", - "5 1 33 0 \n", - "6 0 33 0 \n", - "7 1 33 0 \n", - "8 0 33 0 \n", - "9 1 33 0 \n", - "10 0 33 0 \n", - "11 1 33 0 \n", - "12 0 33 0 \n", - "13 1 33 0 \n", - "14 0 33 0 \n", - "15 1 33 0 \n", - "16 0 33 0 \n", - "17 1 33 0 \n", - "18 0 33 0 \n", - "19 1 33 0 \n", - "20 0 33 0 \n", - "21 1 33 0 \n", - "22 0 33 0 \n", - "23 1 33 0 \n", - "24 0 33 0 \n", - "25 1 33 0 \n", - "26 0 33 0 \n", - "27 1 33 0 \n", - "28 0 33 0 \n", - "29 1 33 0 \n", - "30 0 33 0 \n", - "31 1 33 0 \n", - "32 0 33 0 \n", - "33 1 33 0 \n", - "34 0 33 0 \n", - "35 1 33 0 \n", - "36 0 33 0 \n", - "37 1 33 0 \n", - "38 0 33 0 \n", - "39 1 33 0 \n", - "40 0 33 0 \n", - "41 1 33 0 \n", - "42 0 33 0 \n", - "43 1 33 0 \n", - "44 0 33 0 \n", - "45 1 33 0 \n", - "46 0 33 0 \n", - "47 1 33 0 \n", - "48 0 33 0 \n", - "49 1 33 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_29 45721 0.91442 [1, 0, 0] 41\n", - "1 hap_0 tsk_43 45721 0.91442 [1, 0, 0] 41\n", - "2 hap_1 tsk_36 50000 1.00000 [1, 0, 0] 41\n", - "3 hap_1 tsk_45 45721 0.91442 [1, 0, 0] 41\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 33 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 35 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 33 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 36 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 32 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 32 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 31 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 37 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 29 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 33 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 31 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 35 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 35 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 37 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 32 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 35 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 31 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 37 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 35 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 32 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 40 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 38 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 37 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 20 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 35 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 38 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 31 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 35 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 35 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 31 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 38 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 31 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 32 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 31 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 33 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 35 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 29 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 31 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 35 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 35 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 35 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 29 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 31 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 31 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 34 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 36 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 33 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 35 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 37 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 31 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 3.161054 1.987739 0.894719 \\\n", - "1 ... 2.862108 2.668339 0.587128 \n", - "2 ... 3.520128 1.788699 0.322231 \n", - "3 ... 3.305906 1.850984 1.123676 \n", - "4 ... 2.972155 2.186293 0.817489 \n", - "5 ... 3.410924 1.665599 0.713426 \n", - "6 ... 3.244745 1.811632 0.613804 \n", - "7 ... 3.132424 2.087921 1.109466 \n", - "8 ... 6.385895 2.840349 -3.323941 \n", - "9 ... 3.545052 1.572610 0.680174 \n", - "10 ... 2.694535 2.799481 0.414549 \n", - "11 ... 2.862108 2.668339 0.587128 \n", - "12 ... 3.122197 2.071886 0.896445 \n", - "13 ... 3.602318 1.483306 1.398219 \n", - "14 ... 3.388745 1.696404 0.579854 \n", - "15 ... 3.317316 1.775418 1.231663 \n", - "16 ... 2.694535 2.799481 0.414549 \n", - "17 ... 3.132424 2.087921 1.109466 \n", - "18 ... 3.122197 2.071886 0.896445 \n", - "19 ... 2.972155 2.186293 0.817489 \n", - "20 ... 3.641874 1.716750 1.276547 \n", - "21 ... 3.483271 1.806822 1.004043 \n", - "22 ... 3.573252 1.651868 1.086081 \n", - "23 ... 6.851506 1.716862 -3.739368 \n", - "24 ... 3.329731 1.452891 1.847995 \n", - "25 ... 3.483271 1.806822 1.004043 \n", - "26 ... 2.694535 2.799481 0.414549 \n", - "27 ... 2.862108 2.668339 0.587128 \n", - "28 ... 3.122197 2.071886 0.896445 \n", - "29 ... 2.694535 2.799481 0.414549 \n", - "30 ... 3.708496 1.627059 0.946663 \n", - "31 ... 2.694535 2.799481 0.414549 \n", - "32 ... 3.388745 1.696404 0.579854 \n", - "33 ... 2.694535 2.799481 0.414549 \n", - "34 ... 3.520128 1.788699 0.322231 \n", - "35 ... 2.862108 2.668339 0.587128 \n", - "36 ... 6.385895 2.840349 -3.323941 \n", - "37 ... 3.244745 1.811632 0.613804 \n", - "38 ... 3.475141 1.423393 1.549778 \n", - "39 ... 2.862108 2.668339 0.587128 \n", - "40 ... 2.862108 2.668339 0.587128 \n", - "41 ... 6.385895 2.840349 -3.323941 \n", - "42 ... 2.694535 2.799481 0.414549 \n", - "43 ... 2.926780 1.813958 1.343012 \n", - "44 ... 3.854286 1.364477 0.575553 \n", - "45 ... 3.236394 1.745751 1.613029 \n", - "46 ... 3.403309 1.597486 1.510483 \n", - "47 ... 2.862108 2.668339 0.587128 \n", - "48 ... 3.132424 2.087921 1.109466 \n", - "49 ... 2.694535 2.799481 0.414549 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 1.823558 2.828427 0.0 3 \\\n", - "1 1.706403 1.000000 0.0 1 \n", - "2 2.449610 1.732051 21659.0 2 \n", - "3 3.030315 2.449490 0.0 1 \n", - "4 1.512533 2.645751 0.0 2 \n", - "5 2.460125 1.414214 0.0 1 \n", - "6 2.297570 1.000000 0.0 0 \n", - "7 3.203198 2.236068 932.0 3 \n", - "8 9.527393 6.244998 41443.0 17 \n", - "9 3.015958 2.645751 780.0 3 \n", - "10 0.861425 2.449490 0.0 1 \n", - "11 1.706403 1.000000 0.0 1 \n", - "12 2.764234 2.236068 0.0 0 \n", - "13 4.480304 3.000000 966.0 3 \n", - "14 2.912752 2.449490 780.0 2 \n", - "15 2.758146 3.000000 780.0 4 \n", - "16 0.861425 2.449490 0.0 1 \n", - "17 3.203198 2.236068 932.0 3 \n", - "18 2.764234 2.236068 0.0 0 \n", - "19 1.512533 2.645751 0.0 2 \n", - "20 3.588759 2.449490 932.0 5 \n", - "21 3.694541 2.449490 10203.0 4 \n", - "22 3.468412 2.645751 17979.0 4 \n", - "23 14.225131 6.633250 41443.0 17 \n", - "24 5.253239 2.645751 0.0 3 \n", - "25 3.694541 2.449490 10203.0 4 \n", - "26 0.861425 2.449490 0.0 1 \n", - "27 1.706403 1.000000 0.0 1 \n", - "28 2.764234 2.236068 0.0 0 \n", - "29 0.861425 2.449490 0.0 1 \n", - "30 3.569270 2.828427 17979.0 5 \n", - "31 0.861425 2.449490 0.0 1 \n", - "32 2.912752 2.449490 780.0 2 \n", - "33 0.861425 2.449490 0.0 1 \n", - "34 2.449610 1.732051 21659.0 2 \n", - "35 1.706403 1.000000 0.0 1 \n", - "36 9.527393 6.244998 41443.0 17 \n", - "37 2.297570 1.000000 0.0 0 \n", - "38 4.765461 2.645751 0.0 2 \n", - "39 1.706403 1.000000 0.0 1 \n", - "40 1.706403 1.000000 0.0 1 \n", - "41 9.527393 6.244998 41443.0 17 \n", - "42 0.861425 2.449490 0.0 1 \n", - "43 2.847205 2.000000 0.0 0 \n", - "44 3.601354 2.449490 15115.0 3 \n", - "45 3.625420 2.449490 0.0 3 \n", - "46 3.916345 2.236068 0.0 1 \n", - "47 1.706403 1.000000 0.0 1 \n", - "48 3.203198 2.236068 932.0 3 \n", - "49 0.861425 2.449490 0.0 1 \n", - "\n", - " haplo rep label \n", - "0 0 41 0 \n", - "1 1 41 0 \n", - "2 0 41 0 \n", - "3 1 41 0 \n", - "4 0 41 0 \n", - "5 1 41 0 \n", - "6 0 41 0 \n", - "7 1 41 0 \n", - "8 0 41 0 \n", - "9 1 41 0 \n", - "10 0 41 0 \n", - "11 1 41 0 \n", - "12 0 41 0 \n", - "13 1 41 0 \n", - "14 0 41 0 \n", - "15 1 41 0 \n", - "16 0 41 0 \n", - "17 1 41 0 \n", - "18 0 41 0 \n", - "19 1 41 0 \n", - "20 0 41 0 \n", - "21 1 41 0 \n", - "22 0 41 0 \n", - "23 1 41 0 \n", - "24 0 41 0 \n", - "25 1 41 0 \n", - "26 0 41 0 \n", - "27 1 41 0 \n", - "28 0 41 0 \n", - "29 1 41 0 \n", - "30 0 41 0 \n", - "31 1 41 0 \n", - "32 0 41 0 \n", - "33 1 41 0 \n", - "34 0 41 0 \n", - "35 1 41 0 \n", - "36 0 41 0 \n", - "37 1 41 0 \n", - "38 0 41 0 \n", - "39 1 41 0 \n", - "40 0 41 0 \n", - "41 1 41 0 \n", - "42 0 41 0 \n", - "43 1 41 0 \n", - "44 0 41 0 \n", - "45 1 41 0 \n", - "46 0 41 0 \n", - "47 1 41 0 \n", - "48 0 41 0 \n", - "49 1 41 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_30 50000 1.00000 [1, 0, 0] 46\n", - "1 hap_0 tsk_48 15393 0.30786 [0, 0, 1] 46\n", - "2 hap_1 tsk_28 50000 1.00000 [1, 0, 0] 46\n", - "3 hap_1 tsk_46 50000 1.00000 [1, 0, 0] 46\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 26 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 35 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 32 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 32 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 33 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 29 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 27 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 27 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 32 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 33 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 28 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 22 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 30 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 22 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 27 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 34 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 28 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 30 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 26 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 27 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 28 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 33 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 32 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 34 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 31 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 32 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 30 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 27 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 32 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 33 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 28 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 21 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 31 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 26 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 32 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 33 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 29 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 30 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 37 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 28 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 29 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 33 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 26 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 27 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 30 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 26 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 24 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 25 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 25 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 28 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.281686 4.343796 -1.123004 \\\n", - "1 ... 5.196001 2.801570 -0.319902 \n", - "2 ... 4.387027 4.833998 -0.907629 \n", - "3 ... 4.387027 4.833998 -0.907629 \n", - "4 ... 4.752447 3.094252 -0.688938 \n", - "5 ... 5.719744 2.644532 -1.803642 \n", - "6 ... 4.770507 3.162259 -0.937862 \n", - "7 ... 6.149128 2.308222 -3.376976 \n", - "8 ... 4.887508 2.472270 -0.913232 \n", - "9 ... 4.585775 4.010665 -0.806691 \n", - "10 ... 6.241033 2.129509 -3.361794 \n", - "11 ... 5.431066 2.303520 -1.452246 \n", - "12 ... 4.805050 3.871493 -0.828428 \n", - "13 ... 5.431066 2.303520 -1.452246 \n", - "14 ... 4.770507 3.162259 -0.937862 \n", - "15 ... 4.969083 3.788214 -0.546414 \n", - "16 ... 4.722034 3.102395 -0.611151 \n", - "17 ... 4.805050 3.871493 -0.828428 \n", - "18 ... 4.672050 3.171950 -0.869819 \n", - "19 ... 5.255760 4.136986 -0.974335 \n", - "20 ... 5.304812 4.858975 -1.142028 \n", - "21 ... 5.053319 2.063967 -0.849443 \n", - "22 ... 4.387027 4.833998 -0.907629 \n", - "23 ... 4.969083 3.788214 -0.546414 \n", - "24 ... 4.427295 3.839056 -0.727158 \n", - "25 ... 4.387027 4.833998 -0.907629 \n", - "26 ... 4.805050 3.871493 -0.828428 \n", - "27 ... 5.352100 4.275028 -1.035466 \n", - "28 ... 4.387027 4.833998 -0.907629 \n", - "29 ... 5.447561 1.924081 -0.917874 \n", - "30 ... 4.722034 3.102395 -0.611151 \n", - "31 ... 5.190971 0.933817 -3.275301 \n", - "32 ... 4.942084 3.495808 -0.735360 \n", - "33 ... 5.281686 4.343796 -1.123004 \n", - "34 ... 4.387027 4.833998 -0.907629 \n", - "35 ... 4.752447 3.094252 -0.688938 \n", - "36 ... 5.518602 3.505031 -1.039972 \n", - "37 ... 5.557242 3.877059 -1.201243 \n", - "38 ... 5.369227 3.131406 -0.564308 \n", - "39 ... 5.304812 4.858975 -1.142028 \n", - "40 ... 5.719744 2.644532 -1.803642 \n", - "41 ... 4.585775 4.010665 -0.806691 \n", - "42 ... 4.867950 1.223066 -1.496476 \n", - "43 ... 6.149128 2.308222 -3.376976 \n", - "44 ... 5.557242 3.877059 -1.201243 \n", - "45 ... 4.672050 3.171950 -0.869819 \n", - "46 ... 5.356046 1.192776 -2.476009 \n", - "47 ... 5.199293 4.287350 -1.038693 \n", - "48 ... 5.578237 2.923273 -1.128918 \n", - "49 ... 5.304812 4.858975 -1.142028 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.113655 2.645751 0.0 2 \\\n", - "1 -0.151813 3.464102 0.0 4 \n", - "2 -0.453829 3.605551 0.0 4 \n", - "3 -0.453829 3.605551 0.0 4 \n", - "4 -0.049234 4.242641 32171.0 6 \n", - "5 3.517359 4.123106 15608.0 5 \n", - "6 0.476786 3.316625 0.0 3 \n", - "7 10.402392 5.656854 39306.0 15 \n", - "8 0.153884 4.123106 0.0 6 \n", - "9 -0.607225 3.741657 0.0 5 \n", - "10 10.377909 5.744563 39306.0 16 \n", - "11 1.851433 2.645751 0.0 2 \n", - "12 0.053711 1.414214 0.0 1 \n", - "13 1.851433 2.645751 0.0 2 \n", - "14 0.476786 3.316625 0.0 3 \n", - "15 -0.279101 2.449490 0.0 3 \n", - "16 0.025467 3.464102 0.0 2 \n", - "17 0.053711 1.414214 0.0 1 \n", - "18 0.274101 3.162278 0.0 2 \n", - "19 -0.569901 2.828427 0.0 2 \n", - "20 -0.097156 3.000000 0.0 3 \n", - "21 0.969608 4.690416 6026.0 8 \n", - "22 -0.453829 3.605551 0.0 4 \n", - "23 -0.279101 2.449490 0.0 3 \n", - "24 -0.727521 3.741657 0.0 4 \n", - "25 -0.453829 3.605551 0.0 4 \n", - "26 0.053711 1.414214 0.0 1 \n", - "27 -0.455675 2.828427 0.0 2 \n", - "28 -0.453829 3.605551 0.0 4 \n", - "29 2.544656 4.582576 6026.0 8 \n", - "30 0.025467 3.464102 0.0 2 \n", - "31 14.495006 4.123106 6026.0 4 \n", - "32 -0.144191 1.732051 0.0 2 \n", - "33 -0.113655 2.645751 0.0 2 \n", - "34 -0.453829 3.605551 0.0 4 \n", - "35 -0.049234 4.242641 32171.0 6 \n", - "36 -0.152354 3.162278 0.0 5 \n", - "37 0.414193 3.316625 34923.0 5 \n", - "38 -0.076609 3.000000 1614.0 6 \n", - "39 -0.097156 3.000000 0.0 3 \n", - "40 3.517359 4.123106 15608.0 5 \n", - "41 -0.607225 3.741657 0.0 5 \n", - "42 5.083396 3.605551 0.0 2 \n", - "43 10.402392 5.656854 39306.0 15 \n", - "44 0.414193 3.316625 34923.0 5 \n", - "45 0.274101 3.162278 0.0 2 \n", - "46 9.427823 4.000000 6026.0 6 \n", - "47 -0.440193 2.449490 0.0 1 \n", - "48 0.299099 2.000000 0.0 3 \n", - "49 -0.097156 3.000000 0.0 3 \n", - "\n", - " haplo rep label \n", - "0 0 46 0 \n", - "1 1 46 0 \n", - "2 0 46 0 \n", - "3 1 46 0 \n", - "4 0 46 0 \n", - "5 1 46 0 \n", - "6 0 46 0 \n", - "7 1 46 0 \n", - "8 0 46 0 \n", - "9 1 46 0 \n", - "10 0 46 0 \n", - "11 1 46 0 \n", - "12 0 46 0 \n", - "13 1 46 0 \n", - "14 0 46 0 \n", - "15 1 46 0 \n", - "16 0 46 0 \n", - "17 1 46 0 \n", - "18 0 46 0 \n", - "19 1 46 0 \n", - "20 0 46 0 \n", - "21 1 46 0 \n", - "22 0 46 0 \n", - "23 1 46 0 \n", - "24 0 46 0 \n", - "25 1 46 0 \n", - "26 0 46 0 \n", - "27 1 46 0 \n", - "28 0 46 0 \n", - "29 1 46 0 \n", - "30 0 46 0 \n", - "31 1 46 0 \n", - "32 0 46 0 \n", - "33 1 46 0 \n", - "34 0 46 0 \n", - "35 1 46 0 \n", - "36 0 46 0 \n", - "37 1 46 0 \n", - "38 0 46 0 \n", - "39 1 46 0 \n", - "40 0 46 0 \n", - "41 1 46 0 \n", - "42 0 46 0 \n", - "43 1 46 0 \n", - "44 0 46 0 \n", - "45 1 46 0 \n", - "46 0 46 0 \n", - "47 1 46 0 \n", - "48 0 46 0 \n", - "49 1 46 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_30 12541 0.25082 [0, 1, 0] 50\n", - "1 hap_0 tsk_32 3775 0.07550 [0, 1, 0] 50\n", - "2 hap_0 tsk_34 12541 0.25082 [0, 1, 0] 50\n", - "3 hap_0 tsk_38 12541 0.25082 [0, 1, 0] 50\n", - "4 hap_0 tsk_45 15479 0.30958 [0, 0, 1] 50\n", - "5 hap_0 tsk_48 12541 0.25082 [0, 1, 0] 50\n", - "6 hap_1 tsk_27 15479 0.30958 [0, 0, 1] 50\n", - "7 hap_1 tsk_35 12541 0.25082 [0, 1, 0] 50\n", - "8 hap_1 tsk_40 12541 0.25082 [0, 1, 0] 50\n", - "9 hap_1 tsk_41 16654 0.33308 [0, 0, 1] 50\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 33 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 31 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 32 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 32 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 33 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 38 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 31 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 29 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 33 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 30 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 24 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 29 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 31 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 30 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 27 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 25 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 30 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 31 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 23 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 30 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 31 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 24 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 30 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 27 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 25 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 40 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 24 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 31 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 32 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 30 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 32 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 23 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 28 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 36 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 30 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 30 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 30 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 31 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 29 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 24 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 37 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 28 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 33 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 25 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 35 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 31 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 24 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 34 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 31 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 27 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.797286 2.591473 -1.631211 \\\n", - "1 ... 5.181589 2.191139 -1.649032 \n", - "2 ... 5.200263 4.357264 -0.529772 \n", - "3 ... 5.061263 3.063617 -0.985389 \n", - "4 ... 6.047377 1.869235 -1.558658 \n", - "5 ... 6.582110 3.315828 -1.586521 \n", - "6 ... 5.805617 3.134814 -1.611823 \n", - "7 ... 4.956747 4.030663 -0.633738 \n", - "8 ... 5.011221 1.207668 -1.358260 \n", - "9 ... 4.839754 3.616778 -1.205333 \n", - "10 ... 4.816209 4.444132 -1.026221 \n", - "11 ... 4.956747 4.030663 -0.633738 \n", - "12 ... 5.073955 4.734979 -0.603584 \n", - "13 ... 4.839754 3.616778 -1.205333 \n", - "14 ... 4.927614 3.798619 -0.378844 \n", - "15 ... 5.013052 3.109308 -0.751902 \n", - "16 ... 5.078976 3.764000 -0.585276 \n", - "17 ... 5.805617 3.134814 -1.611823 \n", - "18 ... 4.765779 4.087355 -0.898557 \n", - "19 ... 5.032715 4.471778 -0.504491 \n", - "20 ... 5.073955 4.734979 -0.603584 \n", - "21 ... 4.816209 4.444132 -1.026221 \n", - "22 ... 4.839754 3.616778 -1.205333 \n", - "23 ... 5.020937 3.790194 -0.628032 \n", - "24 ... 4.923298 3.481141 -0.660506 \n", - "25 ... 6.893947 1.673502 -2.915960 \n", - "26 ... 4.816209 4.444132 -1.026221 \n", - "27 ... 5.221589 1.855012 -1.358758 \n", - "28 ... 5.511319 1.665366 -1.353762 \n", - "29 ... 5.078976 3.764000 -0.585276 \n", - "30 ... 5.200263 4.357264 -0.529772 \n", - "31 ... 4.765779 4.087355 -0.898557 \n", - "32 ... 5.008841 3.671514 -0.470421 \n", - "33 ... 6.238317 2.363397 -1.363993 \n", - "34 ... 4.839754 3.616778 -1.205333 \n", - "35 ... 5.065957 3.936081 -0.421287 \n", - "36 ... 5.016684 4.152880 -0.403030 \n", - "37 ... 5.074515 4.689297 -0.582045 \n", - "38 ... 5.180081 3.126765 -0.544042 \n", - "39 ... 4.890782 3.320251 -0.752588 \n", - "40 ... 6.507654 3.330442 -1.537248 \n", - "41 ... 4.927677 3.758000 -0.367715 \n", - "42 ... 5.710630 1.468707 -2.032144 \n", - "43 ... 4.998411 3.215886 -0.791984 \n", - "44 ... 6.254330 2.763360 -1.465912 \n", - "45 ... 5.074515 4.689297 -0.582045 \n", - "46 ... 4.816209 4.444132 -1.026221 \n", - "47 ... 5.565984 0.899825 -3.897117 \n", - "48 ... 5.181589 2.191139 -1.649032 \n", - "49 ... 5.020937 3.790194 -0.628032 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 3.041967 5.000000 2642.0 13 \\\n", - "1 3.943542 2.449490 8411.0 3 \n", - "2 -0.844393 3.162278 0.0 4 \n", - "3 0.411787 3.605551 15083.0 4 \n", - "4 5.167586 5.000000 2642.0 14 \n", - "5 2.927504 6.244998 22616.0 25 \n", - "6 2.844545 4.898979 2642.0 13 \n", - "7 -0.188125 3.000000 0.0 1 \n", - "8 7.161357 2.645751 0.0 2 \n", - "9 0.950165 3.316625 15083.0 3 \n", - "10 0.016570 2.645751 0.0 2 \n", - "11 -0.188125 3.000000 0.0 1 \n", - "12 -0.702090 3.000000 4864.0 3 \n", - "13 0.950165 3.316625 15083.0 3 \n", - "14 -0.995516 2.645751 0.0 2 \n", - "15 -0.323234 2.449490 0.0 2 \n", - "16 -0.262799 3.162278 0.0 2 \n", - "17 2.844545 4.898979 2642.0 13 \n", - "18 -0.287540 2.449490 0.0 1 \n", - "19 -0.922446 2.828427 4864.0 2 \n", - "20 -0.702090 3.000000 4864.0 3 \n", - "21 0.016570 2.645751 0.0 2 \n", - "22 0.950165 3.316625 15083.0 3 \n", - "23 -0.282587 3.000000 0.0 1 \n", - "24 -0.725058 2.449490 0.0 1 \n", - "25 13.502795 6.480741 2641.0 23 \n", - "26 0.016570 2.645751 0.0 2 \n", - "27 3.248088 2.449490 0.0 3 \n", - "28 4.518225 2.828427 0.0 4 \n", - "29 -0.262799 3.162278 0.0 2 \n", - "30 -0.844393 3.162278 0.0 4 \n", - "31 -0.287540 2.449490 0.0 1 \n", - "32 -0.673795 2.449490 0.0 1 \n", - "33 3.328734 5.385165 22616.0 19 \n", - "34 0.950165 3.316625 15083.0 3 \n", - "35 -0.848256 2.000000 0.0 1 \n", - "36 -0.981940 1.414214 0.0 0 \n", - "37 -0.739139 3.000000 0.0 3 \n", - "38 -0.592578 3.000000 1311.0 4 \n", - "39 -0.419970 2.236068 0.0 1 \n", - "40 2.686001 6.164414 22616.0 24 \n", - "41 -0.935524 2.449490 0.0 1 \n", - "42 7.490009 4.358899 2642.0 11 \n", - "43 -0.246521 2.449490 0.0 2 \n", - "44 2.556651 5.656854 2642.0 19 \n", - "45 -0.739139 3.000000 0.0 3 \n", - "46 0.016570 2.645751 0.0 2 \n", - "47 20.945286 5.291503 1311.0 9 \n", - "48 3.943542 2.449490 8411.0 3 \n", - "49 -0.282587 3.000000 0.0 1 \n", - "\n", - " haplo rep label \n", - "0 0 50 0 \n", - "1 1 50 0 \n", - "2 0 50 0 \n", - "3 1 50 0 \n", - "4 0 50 0 \n", - "5 1 50 0 \n", - "6 0 50 0 \n", - "7 1 50 0 \n", - "8 0 50 0 \n", - "9 1 50 0 \n", - "10 0 50 0 \n", - "11 1 50 0 \n", - "12 0 50 0 \n", - "13 1 50 0 \n", - "14 0 50 0 \n", - "15 1 50 0 \n", - "16 0 50 0 \n", - "17 1 50 0 \n", - "18 0 50 0 \n", - "19 1 50 0 \n", - "20 0 50 0 \n", - "21 1 50 0 \n", - "22 0 50 0 \n", - "23 1 50 0 \n", - "24 0 50 0 \n", - "25 1 50 0 \n", - "26 0 50 0 \n", - "27 1 50 0 \n", - "28 0 50 0 \n", - "29 1 50 0 \n", - "30 0 50 0 \n", - "31 1 50 0 \n", - "32 0 50 0 \n", - "33 1 50 0 \n", - "34 0 50 0 \n", - "35 1 50 0 \n", - "36 0 50 0 \n", - "37 1 50 0 \n", - "38 0 50 0 \n", - "39 1 50 0 \n", - "40 0 50 0 \n", - "41 1 50 0 \n", - "42 0 50 0 \n", - "43 1 50 0 \n", - "44 0 50 0 \n", - "45 1 50 0 \n", - "46 0 50 0 \n", - "47 1 50 0 \n", - "48 0 50 0 \n", - "49 1 50 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_1 tsk_26 35 0.0007 [0, 1, 0] 52\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 29 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 29 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 29 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 28 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 24 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 31 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 32 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 28 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 28 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 24 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 29 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 30 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 34 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 29 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 27 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 30 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 31 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 29 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 31 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 24 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 22 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 40 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 29 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 29 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 28 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 31 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 23 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 24 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 31 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 30 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 31 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 23 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 35 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 30 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 28 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 40 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 24 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 34 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 29 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 23 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 41 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 23 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 34 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 40 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 30 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 29 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 31 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 32 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 30 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.413817 3.210586 -1.832573 \\\n", - "1 ... 5.674423 4.720927 -2.020655 \n", - "2 ... 5.413817 3.210586 -1.832573 \n", - "3 ... 5.321997 2.116353 -0.666698 \n", - "4 ... 4.424168 4.186735 -0.749681 \n", - "5 ... 4.827661 3.973686 -1.186314 \n", - "6 ... 6.024050 1.390827 -2.968399 \n", - "7 ... 5.359402 3.436809 -1.421595 \n", - "8 ... 5.359402 3.436809 -1.421595 \n", - "9 ... 4.424168 4.186735 -0.749681 \n", - "10 ... 5.674423 4.720927 -2.020655 \n", - "11 ... 5.300909 1.820362 -1.309746 \n", - "12 ... 5.789310 1.963893 -2.621249 \n", - "13 ... 5.674423 4.720927 -2.020655 \n", - "14 ... 4.862002 3.120935 -0.559488 \n", - "15 ... 6.340682 1.555746 -3.082592 \n", - "16 ... 4.827661 3.973686 -1.186314 \n", - "17 ... 5.674423 4.720927 -2.020655 \n", - "18 ... 5.843877 1.369098 -2.752978 \n", - "19 ... 4.969392 1.745147 -0.902909 \n", - "20 ... 5.059678 2.079654 -0.783398 \n", - "21 ... 6.593407 1.806987 -2.488469 \n", - "22 ... 5.674423 4.720927 -2.020655 \n", - "23 ... 5.413817 3.210586 -1.832573 \n", - "24 ... 5.359402 3.436809 -1.421595 \n", - "25 ... 4.827661 3.973686 -1.186314 \n", - "26 ... 4.371255 4.012128 -0.671192 \n", - "27 ... 4.424168 4.186735 -0.749681 \n", - "28 ... 4.957433 2.783855 -0.829272 \n", - "29 ... 5.534304 2.851478 -1.740589 \n", - "30 ... 4.827661 3.973686 -1.186314 \n", - "31 ... 4.371255 4.012128 -0.671192 \n", - "32 ... 6.460270 1.864910 -2.558394 \n", - "33 ... 4.874406 2.720167 -0.732129 \n", - "34 ... 5.591455 3.335634 -1.519839 \n", - "35 ... 5.351551 1.560900 -1.563427 \n", - "36 ... 6.532515 2.006251 -2.906212 \n", - "37 ... 4.424168 4.186735 -0.749681 \n", - "38 ... 5.789310 1.963893 -2.621249 \n", - "39 ... 5.524959 3.154824 -1.375268 \n", - "40 ... 5.032024 2.278731 -0.561382 \n", - "41 ... 6.544759 1.606128 -2.863105 \n", - "42 ... 4.371255 4.012128 -0.671192 \n", - "43 ... 5.532584 1.190511 -2.543108 \n", - "44 ... 6.532515 2.006251 -2.906212 \n", - "45 ... 5.591455 3.335634 -1.519839 \n", - "46 ... 5.674423 4.720927 -2.020655 \n", - "47 ... 4.827661 3.973686 -1.186314 \n", - "48 ... 5.060833 2.707972 -0.866770 \n", - "49 ... 5.873624 1.500546 -1.989261 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 3.020006 2.000000 1.0 3 \\\n", - "1 2.663093 4.472136 0.0 2 \n", - "2 3.020006 2.000000 1.0 3 \n", - "3 1.601621 4.358899 0.0 1 \n", - "4 -0.281498 1.732051 0.0 1 \n", - "5 0.850899 3.464102 24596.0 3 \n", - "6 11.715903 3.162278 0.0 1 \n", - "7 1.914892 3.605551 0.0 0 \n", - "8 1.914892 3.605551 0.0 0 \n", - "9 -0.281498 1.732051 0.0 1 \n", - "10 2.663093 4.472136 0.0 2 \n", - "11 2.923578 2.828427 1.0 4 \n", - "12 8.967721 4.582576 18295.0 6 \n", - "13 2.663093 4.472136 0.0 2 \n", - "14 -0.428122 2.449490 1.0 4 \n", - "15 12.057502 4.123106 3511.0 2 \n", - "16 0.850899 3.464102 24596.0 3 \n", - "17 2.663093 4.472136 0.0 2 \n", - "18 10.438798 2.236068 0.0 0 \n", - "19 1.965940 3.605551 0.0 2 \n", - "20 1.072477 2.000000 0.0 0 \n", - "21 9.453487 4.358899 18295.0 9 \n", - "22 2.663093 4.472136 0.0 2 \n", - "23 3.020006 2.000000 1.0 3 \n", - "24 1.914892 3.605551 0.0 0 \n", - "25 0.850899 3.464102 24596.0 3 \n", - "26 -0.451502 1.414214 0.0 0 \n", - "27 -0.281498 1.732051 0.0 1 \n", - "28 0.480746 3.464102 1.0 2 \n", - "29 2.751943 2.236068 1.0 4 \n", - "30 0.850899 3.464102 24596.0 3 \n", - "31 -0.451502 1.414214 0.0 0 \n", - "32 8.547009 3.464102 3511.0 3 \n", - "33 0.239183 3.316625 0.0 1 \n", - "34 2.118091 4.358899 2.0 3 \n", - "35 4.638398 3.464102 0.0 0 \n", - "36 9.686253 2.449490 0.0 2 \n", - "37 -0.281498 1.732051 0.0 1 \n", - "38 8.967721 4.582576 18295.0 6 \n", - "39 1.576943 4.242641 1.0 2 \n", - "40 0.655183 2.236068 0.0 1 \n", - "41 11.624676 4.358899 0.0 2 \n", - "42 -0.451502 1.414214 0.0 0 \n", - "43 10.772206 3.464102 0.0 1 \n", - "44 9.686253 2.449490 0.0 2 \n", - "45 2.118091 4.358899 2.0 3 \n", - "46 2.663093 4.472136 0.0 2 \n", - "47 0.850899 3.464102 24596.0 3 \n", - "48 0.653872 3.605551 1.0 3 \n", - "49 8.212818 4.472136 0.0 1 \n", - "\n", - " haplo rep label \n", - "0 0 52 0 \n", - "1 1 52 0 \n", - "2 0 52 0 \n", - "3 1 52 0 \n", - "4 0 52 0 \n", - "5 1 52 0 \n", - "6 0 52 0 \n", - "7 1 52 0 \n", - "8 0 52 0 \n", - "9 1 52 0 \n", - "10 0 52 0 \n", - "11 1 52 0 \n", - "12 0 52 0 \n", - "13 1 52 0 \n", - "14 0 52 0 \n", - "15 1 52 0 \n", - "16 0 52 0 \n", - "17 1 52 0 \n", - "18 0 52 0 \n", - "19 1 52 0 \n", - "20 0 52 0 \n", - "21 1 52 0 \n", - "22 0 52 0 \n", - "23 1 52 0 \n", - "24 0 52 0 \n", - "25 1 52 0 \n", - "26 0 52 0 \n", - "27 1 52 0 \n", - "28 0 52 0 \n", - "29 1 52 0 \n", - "30 0 52 0 \n", - "31 1 52 0 \n", - "32 0 52 0 \n", - "33 1 52 0 \n", - "34 0 52 0 \n", - "35 1 52 0 \n", - "36 0 52 0 \n", - "37 1 52 0 \n", - "38 0 52 0 \n", - "39 1 52 0 \n", - "40 0 52 0 \n", - "41 1 52 0 \n", - "42 0 52 0 \n", - "43 1 52 0 \n", - "44 0 52 0 \n", - "45 1 52 0 \n", - "46 0 52 0 \n", - "47 1 52 0 \n", - "48 0 52 0 \n", - "49 1 52 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_46 12147 0.24294 [0, 1, 0] 53\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 30 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 36 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 33 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 34 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 34 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 32 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 30 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 30 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 32 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 32 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 33 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 30 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 32 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 30 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 36 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 32 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 33 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 32 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 32 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 30 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 34 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 32 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 33 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 31 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 36 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 34 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 36 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 32 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 34 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 31 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 30 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 32 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 32 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 32 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 32 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 34 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 31 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 34 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 32 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 30 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 31 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 35 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 33 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 33 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 35 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 30 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 33 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 32 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 37 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 2.317229 1.070448 -0.322051 \\\n", - "1 ... 3.617178 0.836022 -2.050464 \n", - "2 ... 2.511204 1.133856 -0.695760 \n", - "3 ... 3.777762 1.008516 -2.732259 \n", - "4 ... 2.868039 0.854354 -0.242080 \n", - "5 ... 2.433743 1.236896 -0.459573 \n", - "6 ... 2.196325 0.936157 -0.267626 \n", - "7 ... 2.196325 0.936157 -0.267626 \n", - "8 ... 2.568743 0.921559 -0.686412 \n", - "9 ... 2.433743 1.236896 -0.459573 \n", - "10 ... 2.511204 1.133856 -0.695760 \n", - "11 ... 2.498000 0.679996 0.232084 \n", - "12 ... 2.433743 1.236896 -0.459573 \n", - "13 ... 2.317229 1.070448 -0.322051 \n", - "14 ... 3.617178 0.836022 -2.050464 \n", - "15 ... 2.568743 0.921559 -0.686412 \n", - "16 ... 2.511204 1.133856 -0.695760 \n", - "17 ... 2.408986 0.796788 0.277069 \n", - "18 ... 2.395342 0.862335 0.027711 \n", - "19 ... 2.317229 1.070448 -0.322051 \n", - "20 ... 2.662950 1.108697 -0.731222 \n", - "21 ... 2.714468 0.511661 -0.192688 \n", - "22 ... 3.014125 0.595052 -0.511585 \n", - "23 ... 2.560863 0.841983 -0.115937 \n", - "24 ... 3.091466 0.722841 -0.648030 \n", - "25 ... 2.662950 1.108697 -0.731222 \n", - "26 ... 4.042512 0.858094 -2.801842 \n", - "27 ... 2.568743 0.921559 -0.686412 \n", - "28 ... 3.777762 1.008516 -2.732259 \n", - "29 ... 2.188719 0.889511 0.304503 \n", - "30 ... 2.221138 0.626547 0.596025 \n", - "31 ... 2.433743 1.236896 -0.459573 \n", - "32 ... 2.648743 0.664160 -0.178350 \n", - "33 ... 2.196325 0.936157 -0.267626 \n", - "34 ... 2.423627 0.766034 0.317919 \n", - "35 ... 2.433743 1.236896 -0.459573 \n", - "36 ... 2.662950 1.108697 -0.731222 \n", - "37 ... 2.447329 0.730582 -0.049738 \n", - "38 ... 2.662950 1.108697 -0.731222 \n", - "39 ... 2.423627 0.766034 0.317919 \n", - "40 ... 2.196325 0.936157 -0.267626 \n", - "41 ... 2.188719 0.889511 0.304503 \n", - "42 ... 4.039958 0.478736 -4.031053 \n", - "43 ... 2.511204 1.133856 -0.695760 \n", - "44 ... 2.611273 0.741253 0.095567 \n", - "45 ... 3.916679 0.899622 -2.731449 \n", - "46 ... 2.317229 1.070448 -0.322051 \n", - "47 ... 2.511204 1.133856 -0.695760 \n", - "48 ... 2.395342 0.862335 0.027711 \n", - "49 ... 4.239135 0.549734 -3.906408 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 0.374710 2.236068 0.0 0 \\\n", - "1 7.497066 3.162278 30009.0 5 \n", - "2 1.113908 2.000000 28041.0 3 \n", - "3 6.893219 3.605551 1822.0 3 \n", - "4 1.063707 2.449490 0.0 2 \n", - "5 0.525500 2.000000 0.0 0 \n", - "6 0.520061 1.732051 0.0 1 \n", - "7 0.520061 1.732051 0.0 1 \n", - "8 1.256551 2.236068 36666.0 3 \n", - "9 0.525500 2.000000 0.0 0 \n", - "10 1.113908 2.000000 28041.0 3 \n", - "11 1.028872 2.236068 0.0 1 \n", - "12 0.525500 2.000000 0.0 0 \n", - "13 0.374710 2.236068 0.0 0 \n", - "14 7.497066 3.162278 30009.0 5 \n", - "15 1.256551 2.236068 36666.0 3 \n", - "16 1.113908 2.000000 28041.0 3 \n", - "17 0.528691 1.000000 0.0 1 \n", - "18 0.792049 1.000000 0.0 1 \n", - "19 0.374710 2.236068 0.0 0 \n", - "20 1.290121 1.732051 18791.0 3 \n", - "21 3.313136 2.236068 0.0 1 \n", - "22 3.605250 2.828427 0.0 4 \n", - "23 0.469328 2.449490 0.0 1 \n", - "24 2.388762 2.645751 28041.0 6 \n", - "25 1.290121 1.732051 18791.0 3 \n", - "26 7.725691 3.872983 1822.0 5 \n", - "27 1.256551 2.236068 36666.0 3 \n", - "28 6.893219 3.605551 1822.0 3 \n", - "29 0.396677 0.000000 0.0 0 \n", - "30 0.995774 1.732051 0.0 0 \n", - "31 0.525500 2.000000 0.0 0 \n", - "32 1.268609 2.236068 0.0 3 \n", - "33 0.520061 1.732051 0.0 1 \n", - "34 0.667594 1.000000 0.0 1 \n", - "35 0.525500 2.000000 0.0 0 \n", - "36 1.290121 1.732051 18791.0 3 \n", - "37 0.627816 2.000000 0.0 2 \n", - "38 1.290121 1.732051 18791.0 3 \n", - "39 0.667594 1.000000 0.0 1 \n", - "40 0.520061 1.732051 0.0 1 \n", - "41 0.396677 0.000000 0.0 0 \n", - "42 20.702395 4.000000 0.0 5 \n", - "43 1.113908 2.000000 28041.0 3 \n", - "44 1.042418 1.414214 0.0 2 \n", - "45 7.062392 3.741657 1822.0 4 \n", - "46 0.374710 2.236068 0.0 0 \n", - "47 1.113908 2.000000 28041.0 3 \n", - "48 0.792049 1.000000 0.0 1 \n", - "49 18.955470 3.741657 0.0 5 \n", - "\n", - " haplo rep label \n", - "0 0 53 0 \n", - "1 1 53 0 \n", - "2 0 53 0 \n", - "3 1 53 0 \n", - "4 0 53 0 \n", - "5 1 53 0 \n", - "6 0 53 0 \n", - "7 1 53 0 \n", - "8 0 53 0 \n", - "9 1 53 0 \n", - "10 0 53 0 \n", - "11 1 53 0 \n", - "12 0 53 0 \n", - "13 1 53 0 \n", - "14 0 53 0 \n", - "15 1 53 0 \n", - "16 0 53 0 \n", - "17 1 53 0 \n", - "18 0 53 0 \n", - "19 1 53 0 \n", - "20 0 53 0 \n", - "21 1 53 0 \n", - "22 0 53 0 \n", - "23 1 53 0 \n", - "24 0 53 0 \n", - "25 1 53 0 \n", - "26 0 53 0 \n", - "27 1 53 0 \n", - "28 0 53 0 \n", - "29 1 53 0 \n", - "30 0 53 0 \n", - "31 1 53 0 \n", - "32 0 53 0 \n", - "33 1 53 0 \n", - "34 0 53 0 \n", - "35 1 53 0 \n", - "36 0 53 0 \n", - "37 1 53 0 \n", - "38 0 53 0 \n", - "39 1 53 0 \n", - "40 0 53 0 \n", - "41 1 53 0 \n", - "42 0 53 0 \n", - "43 1 53 0 \n", - "44 0 53 0 \n", - "45 1 53 0 \n", - "46 0 53 0 \n", - "47 1 53 0 \n", - "48 0 53 0 \n", - "49 1 53 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_32 26575 0.5315 [0, 0, 1] 54\n", - "1 hap_1 tsk_27 26575 0.5315 [0, 0, 1] 54\n", - "2 hap_1 tsk_31 26575 0.5315 [0, 0, 1] 54\n", - "3 hap_1 tsk_45 50000 1.0000 [1, 0, 0] 54\n", - "4 hap_1 tsk_46 26575 0.5315 [0, 0, 1] 54\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 35 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 27 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 34 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 28 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 27 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 39 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 28 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 25 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 25 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 33 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 35 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 30 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 31 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 31 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 30 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 30 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 38 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 30 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 25 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 30 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 25 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 27 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 33 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 29 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 32 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 27 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 32 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 27 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 26 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 26 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 25 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 38 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 28 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 28 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 25 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 32 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 28 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 26 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 30 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 27 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 36 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 28 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 31 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 27 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 30 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 30 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 28 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 34 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 30 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.923422 1.173078 -3.488820 \\\n", - "1 ... 3.809617 4.346815 -0.154662 \n", - "2 ... 6.161453 2.136499 -2.455807 \n", - "3 ... 4.329196 2.838058 -0.028258 \n", - "4 ... 3.852610 4.297395 -0.050587 \n", - "5 ... 6.537359 1.362933 -3.655712 \n", - "6 ... 3.993323 4.113369 -0.087646 \n", - "7 ... 3.810437 3.580567 0.139655 \n", - "8 ... 3.810437 3.580567 0.139655 \n", - "9 ... 5.963366 2.098267 -2.913784 \n", - "10 ... 6.034119 1.569412 -2.473336 \n", - "11 ... 4.451053 2.808129 0.080767 \n", - "12 ... 4.310092 3.043107 -0.204937 \n", - "13 ... 4.892811 2.760401 -1.113635 \n", - "14 ... 4.892811 2.760401 -1.113635 \n", - "15 ... 4.992131 1.978627 -1.245001 \n", - "16 ... 5.277169 1.451490 -3.194410 \n", - "17 ... 5.410568 1.625754 -2.903818 \n", - "18 ... 4.992131 1.978627 -1.245001 \n", - "19 ... 4.051404 3.006126 -0.012178 \n", - "20 ... 5.218091 2.551529 -2.134791 \n", - "21 ... 4.051404 3.006126 -0.012178 \n", - "22 ... 3.809617 4.346815 -0.154662 \n", - "23 ... 5.963366 2.098267 -2.913784 \n", - "24 ... 5.324287 0.831972 -3.693302 \n", - "25 ... 5.877075 2.479990 -2.509446 \n", - "26 ... 4.111521 3.155391 0.117293 \n", - "27 ... 6.058768 2.151327 -2.488097 \n", - "28 ... 3.852610 4.297395 -0.050587 \n", - "29 ... 3.958246 3.392285 0.112196 \n", - "30 ... 3.958246 3.392285 0.112196 \n", - "31 ... 4.019885 2.940525 0.151083 \n", - "32 ... 5.410568 1.625754 -2.903818 \n", - "33 ... 3.993323 4.113369 -0.087646 \n", - "34 ... 5.095629 1.534567 -0.874798 \n", - "35 ... 3.825078 3.508775 0.160743 \n", - "36 ... 5.877075 2.479990 -2.509446 \n", - "37 ... 4.310092 3.043107 -0.204937 \n", - "38 ... 3.925757 3.488429 0.131480 \n", - "39 ... 5.218091 2.551529 -2.134791 \n", - "40 ... 3.809617 4.346815 -0.154662 \n", - "41 ... 6.818105 1.133442 -5.315696 \n", - "42 ... 4.021608 3.926672 -0.017211 \n", - "43 ... 4.892811 2.760401 -1.113635 \n", - "44 ... 3.809617 4.346815 -0.154662 \n", - "45 ... 5.218091 2.551529 -2.134791 \n", - "46 ... 5.277169 1.451490 -3.194410 \n", - "47 ... 3.999299 3.825605 -0.060878 \n", - "48 ... 6.104491 1.435184 -3.051283 \n", - "49 ... 5.274659 2.037970 -1.912850 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 15.600872 4.795832 0.0 6 \\\n", - "1 -1.101611 2.449490 13496.0 2 \n", - "2 5.971515 2.645751 0.0 3 \n", - "3 -0.507637 3.000000 2227.0 5 \n", - "4 -1.241530 2.449490 13496.0 3 \n", - "5 17.102679 5.385165 9779.0 9 \n", - "6 -1.139563 2.645751 13496.0 4 \n", - "7 -1.251427 2.000000 0.0 1 \n", - "8 -1.251427 2.000000 0.0 1 \n", - "9 8.830545 4.123106 4521.0 3 \n", - "10 8.384743 3.316625 0.0 2 \n", - "11 -0.694876 2.645751 0.0 5 \n", - "12 -0.239207 3.000000 2227.0 5 \n", - "13 2.104676 3.741657 9779.0 9 \n", - "14 2.104676 3.741657 9779.0 9 \n", - "15 4.274001 4.123106 22606.0 2 \n", - "16 11.916038 4.123106 4811.0 3 \n", - "17 10.179807 4.000000 2227.0 5 \n", - "18 4.274001 4.123106 22606.0 2 \n", - "19 -0.607520 2.449490 896.0 2 \n", - "20 4.715457 3.872983 8806.0 3 \n", - "21 -0.607520 2.449490 896.0 2 \n", - "22 -1.101611 2.449490 13496.0 2 \n", - "23 8.830545 4.123106 4521.0 3 \n", - "24 20.679470 4.582576 0.0 3 \n", - "25 6.061262 2.236068 0.0 2 \n", - "26 -1.071810 2.449490 0.0 3 \n", - "27 6.007720 2.236068 0.0 4 \n", - "28 -1.241530 2.449490 13496.0 3 \n", - "29 -1.155337 2.236068 0.0 2 \n", - "30 -1.155337 2.236068 0.0 2 \n", - "31 -0.902366 2.449490 1923.0 2 \n", - "32 10.179807 4.000000 2227.0 5 \n", - "33 -1.139563 2.645751 13496.0 4 \n", - "34 3.742467 4.123106 0.0 5 \n", - "35 -1.255569 2.000000 0.0 1 \n", - "36 6.061262 2.236068 0.0 2 \n", - "37 -0.239207 3.000000 2227.0 5 \n", - "38 -1.235009 2.236068 0.0 2 \n", - "39 4.715457 3.872983 8806.0 3 \n", - "40 -1.101611 2.449490 13496.0 2 \n", - "41 30.827952 6.782330 9779.0 17 \n", - "42 -1.231144 2.645751 13496.0 4 \n", - "43 2.104676 3.741657 9779.0 9 \n", - "44 -1.101611 2.449490 13496.0 2 \n", - "45 4.715457 3.872983 8806.0 3 \n", - "46 11.916038 4.123106 4811.0 3 \n", - "47 -1.179946 2.645751 13496.0 3 \n", - "48 11.508320 4.472136 4521.0 4 \n", - "49 4.263855 3.872983 8805.0 3 \n", - "\n", - " haplo rep label \n", - "0 0 54 0 \n", - "1 1 54 0 \n", - "2 0 54 0 \n", - "3 1 54 0 \n", - "4 0 54 0 \n", - "5 1 54 0 \n", - "6 0 54 0 \n", - "7 1 54 0 \n", - "8 0 54 0 \n", - "9 1 54 0 \n", - "10 0 54 0 \n", - "11 1 54 0 \n", - "12 0 54 0 \n", - "13 1 54 0 \n", - "14 0 54 0 \n", - "15 1 54 0 \n", - "16 0 54 0 \n", - "17 1 54 0 \n", - "18 0 54 0 \n", - "19 1 54 0 \n", - "20 0 54 0 \n", - "21 1 54 0 \n", - "22 0 54 0 \n", - "23 1 54 0 \n", - "24 0 54 0 \n", - "25 1 54 0 \n", - "26 0 54 0 \n", - "27 1 54 0 \n", - "28 0 54 0 \n", - "29 1 54 0 \n", - "30 0 54 0 \n", - "31 1 54 0 \n", - "32 0 54 0 \n", - "33 1 54 0 \n", - "34 0 54 0 \n", - "35 1 54 0 \n", - "36 0 54 0 \n", - "37 1 54 0 \n", - "38 0 54 0 \n", - "39 1 54 0 \n", - "40 0 54 0 \n", - "41 1 54 0 \n", - "42 0 54 0 \n", - "43 1 54 0 \n", - "44 0 54 0 \n", - "45 1 54 0 \n", - "46 0 54 0 \n", - "47 1 54 0 \n", - "48 0 54 0 \n", - "49 1 54 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_1 tsk_28 50000 1.0 [1, 0, 0] 55\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 18 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 16 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 22 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 17 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 18 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 18 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 22 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 25 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 21 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 16 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 16 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 17 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 16 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 18 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 17 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 23 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 18 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 16 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 23 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 16 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 16 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 17 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 19 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 19 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 23 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 19 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 23 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 22 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 17 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 20 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 32 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 20 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 22 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 32 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 19 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 31 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 22 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 20 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 19 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 17 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 31 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 18 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 31 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 24 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 25 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 25 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 16 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 16 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 31 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 21 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 3.335456 3.314730 0.554484 \\\n", - "1 ... 3.473483 2.534916 0.271992 \n", - "2 ... 4.313587 1.552966 0.132761 \n", - "3 ... 3.645213 2.272421 0.424446 \n", - "4 ... 3.430942 3.028639 0.609669 \n", - "5 ... 3.360995 3.103711 0.749030 \n", - "6 ... 4.006231 2.470113 0.450753 \n", - "7 ... 6.340418 0.879096 -6.156428 \n", - "8 ... 3.946614 2.184240 0.841764 \n", - "9 ... 2.923612 3.972491 0.528191 \n", - "10 ... 3.473483 2.534916 0.271992 \n", - "11 ... 3.181124 3.360451 0.702586 \n", - "12 ... 3.473483 2.534916 0.271992 \n", - "13 ... 3.324806 3.225668 0.699971 \n", - "14 ... 3.095629 3.617081 0.656420 \n", - "15 ... 4.396346 1.232143 -0.584490 \n", - "16 ... 3.335456 3.314730 0.554484 \n", - "17 ... 2.923612 3.972491 0.528191 \n", - "18 ... 4.412364 2.491043 -0.572432 \n", - "19 ... 2.923612 3.972491 0.528191 \n", - "20 ... 2.923612 3.972491 0.528191 \n", - "21 ... 3.292419 3.039974 0.769352 \n", - "22 ... 3.459039 3.355050 0.394084 \n", - "23 ... 3.493100 3.038254 0.681228 \n", - "24 ... 4.412364 2.491043 -0.572432 \n", - "25 ... 3.459039 3.355050 0.394084 \n", - "26 ... 4.412364 2.491043 -0.572432 \n", - "27 ... 4.006231 2.470113 0.450753 \n", - "28 ... 3.095629 3.617081 0.656420 \n", - "29 ... 3.894273 1.714642 -0.348231 \n", - "30 ... 5.896142 4.195514 -2.024364 \n", - "31 ... 3.782791 2.530494 0.729310 \n", - "32 ... 4.196026 2.273363 0.182633 \n", - "33 ... 5.896142 4.195514 -2.024364 \n", - "34 ... 3.534390 2.908090 0.721544 \n", - "35 ... 5.787830 4.541020 -2.031235 \n", - "36 ... 4.196026 2.273363 0.182633 \n", - "37 ... 3.894273 1.714642 -0.348231 \n", - "38 ... 3.459039 3.355050 0.394084 \n", - "39 ... 3.181124 3.360451 0.702586 \n", - "40 ... 5.787830 4.541020 -2.031235 \n", - "41 ... 3.430942 3.028639 0.609669 \n", - "42 ... 5.787830 4.541020 -2.031235 \n", - "43 ... 4.344159 2.088280 0.558808 \n", - "44 ... 5.685050 2.200205 -2.548056 \n", - "45 ... 5.685050 2.200205 -2.548056 \n", - "46 ... 2.923612 3.972491 0.528191 \n", - "47 ... 2.923612 3.972491 0.528191 \n", - "48 ... 5.787830 4.541020 -2.031235 \n", - "49 ... 3.698860 2.878438 0.721907 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.408003 2.236068 0.0 2 \\\n", - "1 0.336221 2.449490 0.0 0 \n", - "2 1.830037 3.605551 0.0 6 \n", - "3 0.282455 2.645751 0.0 1 \n", - "4 -0.219141 2.236068 1.0 2 \n", - "5 -0.432812 2.236068 0.0 1 \n", - "6 0.921832 2.828427 0.0 1 \n", - "7 38.846565 5.916080 0.0 17 \n", - "8 0.708235 3.162278 0.0 2 \n", - "9 -0.572705 1.000000 0.0 0 \n", - "10 0.336221 2.449490 0.0 0 \n", - "11 -0.481144 1.414214 0.0 1 \n", - "12 0.336221 2.449490 0.0 0 \n", - "13 -0.462220 1.732051 5307.0 2 \n", - "14 -0.554165 1.414214 0.0 0 \n", - "15 3.721047 3.872983 2056.0 3 \n", - "16 -0.408003 2.236068 0.0 2 \n", - "17 -0.572705 1.000000 0.0 0 \n", - "18 1.946305 3.872983 18709.0 6 \n", - "19 -0.572705 1.000000 0.0 0 \n", - "20 -0.572705 1.000000 0.0 0 \n", - "21 -0.343531 2.000000 0.0 1 \n", - "22 -0.310529 2.449490 0.0 3 \n", - "23 -0.374348 2.000000 5307.0 3 \n", - "24 1.946305 3.872983 18709.0 6 \n", - "25 -0.310529 2.449490 0.0 3 \n", - "26 1.946305 3.872983 18709.0 6 \n", - "27 0.921832 2.828427 0.0 1 \n", - "28 -0.554165 1.414214 0.0 0 \n", - "29 1.493896 3.316625 2056.0 2 \n", - "30 2.460322 3.464102 27393.0 7 \n", - "31 -0.008481 2.645751 0.0 4 \n", - "32 1.408063 3.162278 26115.0 5 \n", - "33 2.460322 3.464102 27393.0 7 \n", - "34 -0.304193 2.000000 0.0 3 \n", - "35 2.468175 3.316625 27393.0 6 \n", - "36 1.408063 3.162278 26115.0 5 \n", - "37 1.493896 3.316625 2056.0 2 \n", - "38 -0.310529 2.449490 0.0 3 \n", - "39 -0.481144 1.414214 0.0 1 \n", - "40 2.468175 3.316625 27393.0 6 \n", - "41 -0.219141 2.236068 1.0 2 \n", - "42 2.468175 3.316625 27393.0 6 \n", - "43 1.076807 3.464102 0.0 5 \n", - "44 6.236419 1.732051 0.0 1 \n", - "45 6.236419 1.732051 0.0 1 \n", - "46 -0.572705 1.000000 0.0 0 \n", - "47 -0.572705 1.000000 0.0 0 \n", - "48 2.468175 3.316625 27393.0 6 \n", - "49 -0.266891 2.449490 5307.0 4 \n", - "\n", - " haplo rep label \n", - "0 0 55 0 \n", - "1 1 55 0 \n", - "2 0 55 0 \n", - "3 1 55 0 \n", - "4 0 55 0 \n", - "5 1 55 0 \n", - "6 0 55 0 \n", - "7 1 55 0 \n", - "8 0 55 0 \n", - "9 1 55 0 \n", - "10 0 55 0 \n", - "11 1 55 0 \n", - "12 0 55 0 \n", - "13 1 55 0 \n", - "14 0 55 0 \n", - "15 1 55 0 \n", - "16 0 55 0 \n", - "17 1 55 0 \n", - "18 0 55 0 \n", - "19 1 55 0 \n", - "20 0 55 0 \n", - "21 1 55 0 \n", - "22 0 55 0 \n", - "23 1 55 0 \n", - "24 0 55 0 \n", - "25 1 55 0 \n", - "26 0 55 0 \n", - "27 1 55 0 \n", - "28 0 55 0 \n", - "29 1 55 0 \n", - "30 0 55 0 \n", - "31 1 55 0 \n", - "32 0 55 0 \n", - "33 1 55 0 \n", - "34 0 55 0 \n", - "35 1 55 0 \n", - "36 0 55 0 \n", - "37 1 55 0 \n", - "38 0 55 0 \n", - "39 1 55 0 \n", - "40 0 55 0 \n", - "41 1 55 0 \n", - "42 0 55 0 \n", - "43 1 55 0 \n", - "44 0 55 0 \n", - "45 1 55 0 \n", - "46 0 55 0 \n", - "47 1 55 0 \n", - "48 0 55 0 \n", - "49 1 55 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_29 50000 1.00000 [1, 0, 0] 59\n", - "1 hap_1 tsk_45 37236 0.74472 [1, 0, 0] 59\n", - "2 hap_1 tsk_46 37236 0.74472 [1, 0, 0] 59\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 34 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 34 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 39 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 37 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 39 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 35 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 34 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 37 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 32 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 39 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 35 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 35 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 33 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 33 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 38 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 35 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 39 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 36 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 39 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 35 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 35 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 35 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 38 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 37 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 34 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 36 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 39 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 39 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 33 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 37 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 38 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 37 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 38 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 38 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 34 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 37 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 33 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 38 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 42 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 36 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 42 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 41 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 37 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 36 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 39 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 39 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 37 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 38 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 39 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 34 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.515187 2.813089 -1.455011 \\\n", - "1 ... 4.515187 2.813089 -1.455011 \n", - "2 ... 4.531272 2.027575 -0.689106 \n", - "3 ... 4.513891 1.304789 -0.359972 \n", - "4 ... 4.450644 2.671764 -0.768071 \n", - "5 ... 4.819604 2.051419 -1.566975 \n", - "6 ... 4.784749 1.906177 -1.406144 \n", - "7 ... 4.877363 1.131334 -0.894819 \n", - "8 ... 7.399037 1.694246 -4.397975 \n", - "9 ... 4.193605 3.453675 -0.649428 \n", - "10 ... 4.625817 1.801816 -1.218222 \n", - "11 ... 4.818707 2.300066 -1.541921 \n", - "12 ... 4.294635 2.116108 -0.950823 \n", - "13 ... 4.294635 2.116108 -0.950823 \n", - "14 ... 4.017296 3.981336 -0.743108 \n", - "15 ... 4.625817 1.801816 -1.218222 \n", - "16 ... 5.749720 1.340721 -2.291867 \n", - "17 ... 4.776942 1.860820 -1.129142 \n", - "18 ... 4.450644 2.671764 -0.768071 \n", - "19 ... 4.819604 2.051419 -1.566975 \n", - "20 ... 4.461860 1.251806 -0.210200 \n", - "21 ... 4.667688 2.372686 -1.301929 \n", - "22 ... 4.017296 3.981336 -0.743108 \n", - "23 ... 5.666397 1.411947 -2.157318 \n", - "24 ... 4.515187 2.813089 -1.455011 \n", - "25 ... 4.912280 2.309501 -1.599233 \n", - "26 ... 5.496812 1.425056 -1.615806 \n", - "27 ... 4.531272 2.027575 -0.689106 \n", - "28 ... 4.294635 2.116108 -0.950823 \n", - "29 ... 4.635082 2.116011 -1.577437 \n", - "30 ... 4.017296 3.981336 -0.743108 \n", - "31 ... 4.523035 1.262152 -0.358619 \n", - "32 ... 4.612458 1.445228 -0.125791 \n", - "33 ... 4.017296 3.981336 -0.743108 \n", - "34 ... 4.515187 2.813089 -1.455011 \n", - "35 ... 4.635082 2.116011 -1.577437 \n", - "36 ... 4.294635 2.116108 -0.950823 \n", - "37 ... 4.017296 3.981336 -0.743108 \n", - "38 ... 5.690658 1.696410 -2.028193 \n", - "39 ... 4.926921 2.205445 -1.574066 \n", - "40 ... 5.690658 1.696410 -2.028193 \n", - "41 ... 6.797541 1.513434 -3.693303 \n", - "42 ... 4.635082 2.116011 -1.577437 \n", - "43 ... 7.197871 1.790653 -3.853233 \n", - "44 ... 4.193605 3.453675 -0.649428 \n", - "45 ... 4.450644 2.671764 -0.768071 \n", - "46 ... 5.017903 2.220654 -1.631979 \n", - "47 ... 4.017296 3.981336 -0.743108 \n", - "48 ... 4.218408 3.245037 -0.579796 \n", - "49 ... 4.440693 1.840245 -0.785337 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 2.224000 4.242641 30332.0 7 \\\n", - "1 2.224000 4.242641 30332.0 7 \n", - "2 2.695331 3.162278 8327.0 5 \n", - "3 4.932970 3.162278 0.0 3 \n", - "4 1.456359 2.449490 42606.0 4 \n", - "5 3.331081 3.741657 0.0 1 \n", - "6 2.267899 4.000000 0.0 3 \n", - "7 8.960891 2.449490 0.0 1 \n", - "8 20.004089 7.483315 27559.0 23 \n", - "9 -0.024082 3.000000 0.0 2 \n", - "10 3.807939 4.358899 0.0 2 \n", - "11 2.138225 4.123106 24013.0 4 \n", - "12 1.771633 4.123106 0.0 4 \n", - "13 1.771633 4.123106 0.0 4 \n", - "14 -0.029582 2.828427 0.0 1 \n", - "15 3.807939 4.358899 0.0 2 \n", - "16 10.625789 4.000000 686.0 5 \n", - "17 2.601833 4.242641 30331.0 8 \n", - "18 1.456359 2.449490 42606.0 4 \n", - "19 3.331081 3.741657 0.0 1 \n", - "20 5.443390 3.000000 0.0 2 \n", - "21 2.042313 4.358899 30332.0 8 \n", - "22 -0.029582 2.828427 0.0 1 \n", - "23 9.109045 2.828427 686.0 4 \n", - "24 2.224000 4.242641 30332.0 7 \n", - "25 2.352949 4.242641 24013.0 5 \n", - "26 7.925318 1.414214 0.0 1 \n", - "27 2.695331 3.162278 8327.0 5 \n", - "28 1.771633 4.123106 0.0 4 \n", - "29 3.882767 3.741657 0.0 2 \n", - "30 -0.029582 2.828427 0.0 1 \n", - "31 5.581811 3.162278 0.0 3 \n", - "32 4.361066 3.605551 0.0 3 \n", - "33 -0.029582 2.828427 0.0 1 \n", - "34 2.224000 4.242641 30332.0 7 \n", - "35 3.882767 3.741657 0.0 2 \n", - "36 1.771633 4.123106 0.0 4 \n", - "37 -0.029582 2.828427 0.0 1 \n", - "38 7.832731 3.316625 25432.0 7 \n", - "39 2.388785 4.242641 24013.0 5 \n", - "40 7.832731 3.316625 25432.0 7 \n", - "41 16.917096 6.403124 27559.0 23 \n", - "42 3.882767 3.741657 0.0 2 \n", - "43 16.057827 7.211103 27559.0 24 \n", - "44 -0.024082 3.000000 0.0 2 \n", - "45 1.456359 2.449490 42606.0 4 \n", - "46 2.602989 4.358899 24013.0 6 \n", - "47 -0.029582 2.828427 0.0 1 \n", - "48 -0.077329 2.645751 0.0 1 \n", - "49 1.659757 4.242641 0.0 5 \n", - "\n", - " haplo rep label \n", - "0 0 59 0 \n", - "1 1 59 0 \n", - "2 0 59 0 \n", - "3 1 59 0 \n", - "4 0 59 0 \n", - "5 1 59 0 \n", - "6 0 59 0 \n", - "7 1 59 0 \n", - "8 0 59 0 \n", - "9 1 59 0 \n", - "10 0 59 0 \n", - "11 1 59 0 \n", - "12 0 59 0 \n", - "13 1 59 0 \n", - "14 0 59 0 \n", - "15 1 59 0 \n", - "16 0 59 0 \n", - "17 1 59 0 \n", - "18 0 59 0 \n", - "19 1 59 0 \n", - "20 0 59 0 \n", - "21 1 59 0 \n", - "22 0 59 0 \n", - "23 1 59 0 \n", - "24 0 59 0 \n", - "25 1 59 0 \n", - "26 0 59 0 \n", - "27 1 59 0 \n", - "28 0 59 0 \n", - "29 1 59 0 \n", - "30 0 59 0 \n", - "31 1 59 0 \n", - "32 0 59 0 \n", - "33 1 59 0 \n", - "34 0 59 0 \n", - "35 1 59 0 \n", - "36 0 59 0 \n", - "37 1 59 0 \n", - "38 0 59 0 \n", - "39 1 59 0 \n", - "40 0 59 0 \n", - "41 1 59 0 \n", - "42 0 59 0 \n", - "43 1 59 0 \n", - "44 0 59 0 \n", - "45 1 59 0 \n", - "46 0 59 0 \n", - "47 1 59 0 \n", - "48 0 59 0 \n", - "49 1 59 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_27 50000 1.00000 [1, 0, 0] 60\n", - "1 hap_0 tsk_32 50000 1.00000 [1, 0, 0] 60\n", - "2 hap_0 tsk_39 50000 1.00000 [1, 0, 0] 60\n", - "3 hap_0 tsk_42 50000 1.00000 [1, 0, 0] 60\n", - "4 hap_0 tsk_45 50000 1.00000 [1, 0, 0] 60\n", - "5 hap_1 tsk_26 39368 0.78736 [1, 0, 0] 60\n", - "6 hap_1 tsk_28 50000 1.00000 [1, 0, 0] 60\n", - "7 hap_1 tsk_33 50000 1.00000 [1, 0, 0] 60\n", - "8 hap_1 tsk_37 50000 1.00000 [1, 0, 0] 60\n", - "9 hap_1 tsk_48 39368 0.78736 [1, 0, 0] 60\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 28 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 24 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 28 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 27 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 24 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 25 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 29 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 24 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 24 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 28 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 23 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 28 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 25 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 24 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 24 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 25 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 24 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 24 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 24 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 29 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 26 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 22 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 22 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 23 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 26 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 23 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 29 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 29 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 23 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 28 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 28 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 24 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 27 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 18 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 24 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 23 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 28 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 26 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 26 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 25 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 23 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 19 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 21 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 28 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 24 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 23 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 20 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 24 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 23 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 25 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.384575 2.955500 -1.438096 \\\n", - "1 ... 4.502674 2.185930 -1.512649 \n", - "2 ... 4.429834 2.716571 -1.513374 \n", - "3 ... 5.009748 0.802426 -3.337699 \n", - "4 ... 5.057230 2.804420 -1.748171 \n", - "5 ... 4.418697 1.455118 -1.373985 \n", - "6 ... 4.786127 1.832989 -2.417173 \n", - "7 ... 5.028946 3.049700 -1.797128 \n", - "8 ... 4.253583 1.607035 -1.371722 \n", - "9 ... 4.384575 2.955500 -1.438096 \n", - "10 ... 4.281468 2.209034 -1.259832 \n", - "11 ... 4.429834 2.716571 -1.513374 \n", - "12 ... 4.467474 1.741674 -1.183591 \n", - "13 ... 4.502674 2.185930 -1.512649 \n", - "14 ... 5.028946 3.049700 -1.797128 \n", - "15 ... 4.431300 1.623584 -0.996081 \n", - "16 ... 4.502674 2.185930 -1.512649 \n", - "17 ... 5.028946 3.049700 -1.797128 \n", - "18 ... 4.344029 1.749411 -1.563634 \n", - "19 ... 4.786127 1.832989 -2.417173 \n", - "20 ... 4.475699 2.068120 -1.219823 \n", - "21 ... 4.147473 1.578464 -1.203376 \n", - "22 ... 4.802886 0.872283 -2.846380 \n", - "23 ... 4.220450 1.567806 -1.162215 \n", - "24 ... 4.490340 1.976848 -1.182992 \n", - "25 ... 5.178698 2.721088 -1.863233 \n", - "26 ... 4.786127 1.832989 -2.417173 \n", - "27 ... 4.749207 1.385035 -1.007206 \n", - "28 ... 4.940520 3.011265 -1.725058 \n", - "29 ... 4.384575 2.955500 -1.438096 \n", - "30 ... 4.429834 2.716571 -1.513374 \n", - "31 ... 4.344029 1.749411 -1.563634 \n", - "32 ... 4.590529 1.987041 -1.261203 \n", - "33 ... 4.663557 0.991234 -3.093952 \n", - "34 ... 5.028946 3.049700 -1.797128 \n", - "35 ... 4.403857 1.146041 -1.453231 \n", - "36 ... 4.384575 2.955500 -1.438096 \n", - "37 ... 4.527063 1.445705 -1.485164 \n", - "38 ... 6.104884 0.870397 -5.560407 \n", - "39 ... 4.372055 2.065135 -1.143707 \n", - "40 ... 5.178698 2.721088 -1.863233 \n", - "41 ... 4.763081 1.013058 -3.191318 \n", - "42 ... 4.655792 0.983598 -2.463175 \n", - "43 ... 4.589275 1.718554 -1.397592 \n", - "44 ... 4.253583 1.607035 -1.371722 \n", - "45 ... 4.281468 2.209034 -1.259832 \n", - "46 ... 4.553129 0.969020 -2.333377 \n", - "47 ... 5.781612 1.552959 -2.478306 \n", - "48 ... 4.281468 2.209034 -1.259832 \n", - "49 ... 4.519646 1.832803 -1.102601 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 1.167325 4.123106 26878.0 6 \\\n", - "1 2.877542 3.464102 1.0 3 \n", - "2 1.308799 3.872983 26878.0 6 \n", - "3 16.901533 4.000000 12283.0 10 \n", - "4 1.532008 4.690416 23280.0 13 \n", - "5 2.818447 3.162278 0.0 0 \n", - "6 6.630993 4.242641 31378.0 7 \n", - "7 1.742156 4.690416 23280.0 13 \n", - "8 2.821740 3.316625 0.0 1 \n", - "9 1.167325 4.123106 26878.0 6 \n", - "10 1.891917 3.000000 0.0 0 \n", - "11 1.308799 3.872983 26878.0 6 \n", - "12 1.659837 3.741657 0.0 3 \n", - "13 2.877542 3.464102 1.0 3 \n", - "14 1.742156 4.690416 23280.0 13 \n", - "15 1.912160 3.605551 0.0 2 \n", - "16 2.877542 3.464102 1.0 3 \n", - "17 1.742156 4.690416 23280.0 13 \n", - "18 2.801056 2.645751 7146.0 3 \n", - "19 6.630993 4.242641 31378.0 7 \n", - "20 1.340337 3.605551 0.0 4 \n", - "21 1.561895 2.236068 0.0 1 \n", - "22 11.983123 4.000000 0.0 1 \n", - "23 1.599483 2.449490 0.0 2 \n", - "24 1.342121 3.605551 0.0 4 \n", - "25 2.249857 4.898979 23280.0 14 \n", - "26 6.630993 4.242641 31378.0 7 \n", - "27 3.425014 3.162278 0.0 2 \n", - "28 1.367795 4.582576 23280.0 12 \n", - "29 1.167325 4.123106 26878.0 6 \n", - "30 1.308799 3.872983 26878.0 6 \n", - "31 2.801056 2.645751 7146.0 3 \n", - "32 1.594218 3.741657 0.0 5 \n", - "33 10.658953 3.464102 0.0 0 \n", - "34 1.742156 4.690416 23280.0 13 \n", - "35 3.718272 3.162278 0.0 2 \n", - "36 1.167325 4.123106 26878.0 6 \n", - "37 3.325378 3.316625 0.0 1 \n", - "38 33.723117 5.000000 4816.0 14 \n", - "39 1.096760 3.464102 0.0 3 \n", - "40 2.249857 4.898979 23280.0 14 \n", - "41 11.245901 3.605551 0.0 1 \n", - "42 8.593786 2.828427 21140.0 4 \n", - "43 1.775571 3.872983 15645.0 6 \n", - "44 2.821740 3.316625 0.0 1 \n", - "45 1.891917 3.000000 0.0 0 \n", - "46 7.917241 2.645751 21140.0 3 \n", - "47 7.227903 5.385165 17725.0 18 \n", - "48 1.891917 3.000000 0.0 0 \n", - "49 1.732034 3.000000 0.0 1 \n", - "\n", - " haplo rep label \n", - "0 0 60 0 \n", - "1 1 60 0 \n", - "2 0 60 0 \n", - "3 1 60 0 \n", - "4 0 60 0 \n", - "5 1 60 0 \n", - "6 0 60 0 \n", - "7 1 60 0 \n", - "8 0 60 0 \n", - "9 1 60 0 \n", - "10 0 60 0 \n", - "11 1 60 0 \n", - "12 0 60 0 \n", - "13 1 60 0 \n", - "14 0 60 0 \n", - "15 1 60 0 \n", - "16 0 60 0 \n", - "17 1 60 0 \n", - "18 0 60 0 \n", - "19 1 60 0 \n", - "20 0 60 0 \n", - "21 1 60 0 \n", - "22 0 60 0 \n", - "23 1 60 0 \n", - "24 0 60 0 \n", - "25 1 60 0 \n", - "26 0 60 0 \n", - "27 1 60 0 \n", - "28 0 60 0 \n", - "29 1 60 0 \n", - "30 0 60 0 \n", - "31 1 60 0 \n", - "32 0 60 0 \n", - "33 1 60 0 \n", - "34 0 60 0 \n", - "35 1 60 0 \n", - "36 0 60 0 \n", - "37 1 60 0 \n", - "38 0 60 0 \n", - "39 1 60 0 \n", - "40 0 60 0 \n", - "41 1 60 0 \n", - "42 0 60 0 \n", - "43 1 60 0 \n", - "44 0 60 0 \n", - "45 1 60 0 \n", - "46 0 60 0 \n", - "47 1 60 0 \n", - "48 0 60 0 \n", - "49 1 60 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_30 12571 0.25142 [0, 1, 0] 63\n", - "1 hap_0 tsk_31 18229 0.36458 [0, 0, 1] 63\n", - "2 hap_0 tsk_38 20425 0.40850 [0, 0, 1] 63\n", - "3 hap_0 tsk_42 12571 0.25142 [0, 1, 0] 63\n", - "4 hap_0 tsk_44 12571 0.25142 [0, 1, 0] 63\n", - "5 hap_0 tsk_48 29760 0.59520 [0, 0, 1] 63\n", - "6 hap_1 tsk_33 12571 0.25142 [0, 1, 0] 63\n", - "7 hap_1 tsk_39 12571 0.25142 [0, 1, 0] 63\n", - "8 hap_1 tsk_42 12571 0.25142 [0, 1, 0] 63\n", - "9 hap_1 tsk_49 20425 0.40850 [0, 0, 1] 63\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 29 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 39 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 32 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 31 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 27 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 27 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 23 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 30 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 42 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 39 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 23 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 29 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 33 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 23 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 23 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 29 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 40 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 30 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 28 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 26 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 23 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 31 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 40 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 31 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 28 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 29 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 32 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 29 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 42 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 23 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 26 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 29 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 31 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 19 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 30 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 23 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 31 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 26 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 24 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 29 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 39 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 29 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 30 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 29 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 39 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 29 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 31 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 40 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 31 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 32 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.858587 1.954128 -2.446460 \\\n", - "1 ... 5.276106 4.002708 -1.440801 \n", - "2 ... 5.057535 2.061343 -2.077587 \n", - "3 ... 4.781855 2.493865 -1.300540 \n", - "4 ... 4.842766 0.827616 -2.846219 \n", - "5 ... 5.463838 2.226478 -1.724990 \n", - "6 ... 5.348182 2.156946 -1.135226 \n", - "7 ... 4.635727 2.870035 -1.363477 \n", - "8 ... 5.422069 3.801171 -1.471248 \n", - "9 ... 5.276106 4.002708 -1.440801 \n", - "10 ... 5.071656 2.958304 -1.348019 \n", - "11 ... 4.858587 1.954128 -2.446460 \n", - "12 ... 5.333729 2.151339 -1.977300 \n", - "13 ... 5.078604 2.967784 -1.361753 \n", - "14 ... 5.106888 2.719695 -1.239041 \n", - "15 ... 4.530888 2.911055 -1.307228 \n", - "16 ... 5.491106 2.887752 -1.357987 \n", - "17 ... 4.948489 1.712454 -2.171807 \n", - "18 ... 5.573611 1.174856 -2.671816 \n", - "19 ... 5.426895 2.348807 -1.705202 \n", - "20 ... 5.078604 2.967784 -1.361753 \n", - "21 ... 4.778262 2.488210 -1.299843 \n", - "22 ... 5.235268 3.951969 -1.406371 \n", - "23 ... 4.930602 2.369165 -2.163750 \n", - "24 ... 4.947780 1.839470 -1.518548 \n", - "25 ... 4.495982 3.266148 -1.210294 \n", - "26 ... 5.305905 2.407371 -2.014758 \n", - "27 ... 5.044889 1.829097 -1.606991 \n", - "28 ... 5.422069 3.801171 -1.471248 \n", - "29 ... 5.099940 2.710608 -1.223129 \n", - "30 ... 5.398611 2.614998 -1.849439 \n", - "31 ... 4.495982 3.266148 -1.210294 \n", - "32 ... 4.771125 2.636370 -1.096035 \n", - "33 ... 5.330521 1.625546 -1.448742 \n", - "34 ... 4.948489 1.712454 -2.171807 \n", - "35 ... 5.071656 2.958304 -1.348019 \n", - "36 ... 4.930602 2.369165 -2.163750 \n", - "37 ... 5.398611 2.614998 -1.849439 \n", - "38 ... 5.266376 2.065285 -0.849070 \n", - "39 ... 4.495982 3.266148 -1.210294 \n", - "40 ... 5.276106 4.002708 -1.440801 \n", - "41 ... 4.858587 1.954128 -2.446460 \n", - "42 ... 4.635727 2.870035 -1.363477 \n", - "43 ... 4.530888 2.911055 -1.307228 \n", - "44 ... 5.232750 3.458326 -1.356842 \n", - "45 ... 4.495982 3.266148 -1.210294 \n", - "46 ... 5.966675 0.878790 -5.108173 \n", - "47 ... 5.235268 3.951969 -1.406371 \n", - "48 ... 4.930602 2.369165 -2.163750 \n", - "49 ... 5.305905 2.407371 -2.014758 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 5.921572 3.000000 3901.0 2 \\\n", - "1 0.899918 1.414214 0.0 2 \n", - "2 4.042239 4.358899 7008.0 5 \n", - "3 0.646467 4.690416 0.0 5 \n", - "4 13.409198 4.123106 9002.0 3 \n", - "5 3.222516 3.464102 0.0 2 \n", - "6 1.854665 3.000000 9002.0 4 \n", - "7 0.769444 4.582576 0.0 4 \n", - "8 0.997878 2.236068 8408.0 3 \n", - "9 0.899918 1.414214 0.0 2 \n", - "10 1.367735 3.000000 0.0 2 \n", - "11 5.921572 3.000000 3901.0 2 \n", - "12 3.942639 3.000000 8264.0 7 \n", - "13 1.378013 3.000000 0.0 1 \n", - "14 0.985190 3.000000 0.0 1 \n", - "15 0.534040 4.472136 0.0 3 \n", - "16 0.978803 2.236068 7007.0 3 \n", - "17 5.556503 5.000000 0.0 3 \n", - "18 11.423605 4.123106 0.0 3 \n", - "19 2.978640 3.316625 0.0 2 \n", - "20 1.378013 3.000000 0.0 1 \n", - "21 0.650048 4.690416 0.0 5 \n", - "22 0.685619 1.732051 0.0 1 \n", - "23 4.287273 4.242641 7008.0 4 \n", - "24 2.690651 1.414214 0.0 2 \n", - "25 0.632421 4.472136 0.0 2 \n", - "26 4.058258 2.828427 8264.0 7 \n", - "27 3.120893 1.732051 0.0 3 \n", - "28 0.997878 2.236068 8408.0 3 \n", - "29 0.970898 3.000000 0.0 2 \n", - "30 3.470747 3.316625 0.0 2 \n", - "31 0.632421 4.472136 0.0 2 \n", - "32 0.435249 4.690416 0.0 4 \n", - "33 3.942706 3.162278 0.0 3 \n", - "34 5.556503 5.000000 0.0 3 \n", - "35 1.367735 3.000000 0.0 2 \n", - "36 4.287273 4.242641 7008.0 4 \n", - "37 3.470747 3.316625 0.0 2 \n", - "38 1.546275 3.464102 0.0 5 \n", - "39 0.632421 4.472136 0.0 2 \n", - "40 0.899918 1.414214 0.0 2 \n", - "41 5.921572 3.000000 3901.0 2 \n", - "42 0.769444 4.582576 0.0 4 \n", - "43 0.534040 4.472136 0.0 3 \n", - "44 0.615774 2.000000 0.0 1 \n", - "45 0.632421 4.472136 0.0 2 \n", - "46 29.880969 4.242641 0.0 3 \n", - "47 0.685619 1.732051 0.0 1 \n", - "48 4.287273 4.242641 7008.0 4 \n", - "49 4.058258 2.828427 8264.0 7 \n", - "\n", - " haplo rep label \n", - "0 0 63 0 \n", - "1 1 63 0 \n", - "2 0 63 0 \n", - "3 1 63 0 \n", - "4 0 63 0 \n", - "5 1 63 0 \n", - "6 0 63 0 \n", - "7 1 63 0 \n", - "8 0 63 0 \n", - "9 1 63 0 \n", - "10 0 63 0 \n", - "11 1 63 0 \n", - "12 0 63 0 \n", - "13 1 63 0 \n", - "14 0 63 0 \n", - "15 1 63 0 \n", - "16 0 63 0 \n", - "17 1 63 0 \n", - "18 0 63 0 \n", - "19 1 63 0 \n", - "20 0 63 0 \n", - "21 1 63 0 \n", - "22 0 63 0 \n", - "23 1 63 0 \n", - "24 0 63 0 \n", - "25 1 63 0 \n", - "26 0 63 0 \n", - "27 1 63 0 \n", - "28 0 63 0 \n", - "29 1 63 0 \n", - "30 0 63 0 \n", - "31 1 63 0 \n", - "32 0 63 0 \n", - "33 1 63 0 \n", - "34 0 63 0 \n", - "35 1 63 0 \n", - "36 0 63 0 \n", - "37 1 63 0 \n", - "38 0 63 0 \n", - "39 1 63 0 \n", - "40 0 63 0 \n", - "41 1 63 0 \n", - "42 0 63 0 \n", - "43 1 63 0 \n", - "44 0 63 0 \n", - "45 1 63 0 \n", - "46 0 63 0 \n", - "47 1 63 0 \n", - "48 0 63 0 \n", - "49 1 63 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_25 39146 0.78292 [1, 0, 0] 65\n", - "1 hap_0 tsk_36 39146 0.78292 [1, 0, 0] 65\n", - "2 hap_0 tsk_37 39146 0.78292 [1, 0, 0] 65\n", - "3 hap_0 tsk_43 39146 0.78292 [1, 0, 0] 65\n", - "4 hap_0 tsk_47 39146 0.78292 [1, 0, 0] 65\n", - "5 hap_1 tsk_25 39146 0.78292 [1, 0, 0] 65\n", - "6 hap_1 tsk_31 39146 0.78292 [1, 0, 0] 65\n", - "7 hap_1 tsk_32 39146 0.78292 [1, 0, 0] 65\n", - "8 hap_1 tsk_35 39146 0.78292 [1, 0, 0] 65\n", - "9 hap_1 tsk_46 39146 0.78292 [1, 0, 0] 65\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 29 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 29 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 23 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 19 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 17 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 22 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 23 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 19 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 23 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 22 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 20 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 23 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 22 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 26 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 28 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 23 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 19 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 23 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 22 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 21 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 22 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 26 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 17 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 30 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 17 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 20 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 27 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 19 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 17 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 26 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 17 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 19 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 17 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 24 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 23 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 22 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 18 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 18 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 24 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 24 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 23 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 19 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 22 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 28 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 21 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 28 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 24 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 23 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 23 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.626842 2.978646 -2.201076 \\\n", - "1 ... 5.626842 2.978646 -2.201076 \n", - "2 ... 4.092848 2.088592 -0.662841 \n", - "3 ... 3.792758 1.894990 -0.549285 \n", - "4 ... 3.498695 2.479133 -0.917192 \n", - "5 ... 4.231522 1.214220 -0.433277 \n", - "6 ... 4.482649 1.185854 -0.608308 \n", - "7 ... 3.789073 1.722922 -0.358537 \n", - "8 ... 4.929294 1.102060 -1.333334 \n", - "9 ... 4.640135 1.149151 -3.084679 \n", - "10 ... 4.101573 1.337098 -0.592688 \n", - "11 ... 4.111052 1.939249 -0.464699 \n", - "12 ... 4.640135 1.149151 -3.084679 \n", - "13 ... 5.502592 3.401480 -2.226789 \n", - "14 ... 4.467383 1.762486 -0.629443 \n", - "15 ... 5.502592 3.401480 -2.226789 \n", - "16 ... 4.092848 2.088592 -0.662841 \n", - "17 ... 3.982053 1.343253 -0.440484 \n", - "18 ... 4.262937 1.267372 -0.412777 \n", - "19 ... 4.284771 1.320735 -0.800981 \n", - "20 ... 4.060775 1.470103 -0.816987 \n", - "21 ... 4.027246 1.501290 -1.528584 \n", - "22 ... 4.773928 0.969613 -2.580990 \n", - "23 ... 3.498695 2.479133 -0.917192 \n", - "24 ... 5.790667 2.428173 -2.270704 \n", - "25 ... 3.498695 2.479133 -0.917192 \n", - "26 ... 3.928939 1.803441 -0.601150 \n", - "27 ... 4.570494 1.470587 -0.408541 \n", - "28 ... 3.950706 1.871925 -0.622050 \n", - "29 ... 3.498695 2.479133 -0.917192 \n", - "30 ... 4.836975 1.483671 -0.788900 \n", - "31 ... 3.498695 2.479133 -0.917192 \n", - "32 ... 3.950706 1.871925 -0.622050 \n", - "33 ... 3.498695 2.479133 -0.917192 \n", - "34 ... 4.445802 1.394844 -0.506661 \n", - "35 ... 4.236566 0.971509 -0.778132 \n", - "36 ... 4.027246 1.501290 -1.528584 \n", - "37 ... 3.812765 1.982821 -0.589174 \n", - "38 ... 3.679265 1.863006 -0.428497 \n", - "39 ... 4.209216 1.282499 -0.362265 \n", - "40 ... 4.032640 1.977811 -0.585408 \n", - "41 ... 4.394778 1.325922 -0.953511 \n", - "42 ... 3.950706 1.871925 -0.622050 \n", - "43 ... 4.474869 1.095548 -1.479628 \n", - "44 ... 5.502592 3.401480 -2.226789 \n", - "45 ... 4.060775 1.470103 -0.816987 \n", - "46 ... 4.597306 0.984776 -1.590549 \n", - "47 ... 4.032640 1.977811 -0.585408 \n", - "48 ... 4.059597 1.919673 -0.428013 \n", - "49 ... 4.059597 1.919673 -0.428013 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 3.422988 5.744563 20736.0 20 \\\n", - "1 3.422988 5.744563 20736.0 20 \n", - "2 1.333579 1.414214 0.0 1 \n", - "3 0.122531 1.732051 0.0 2 \n", - "4 0.538042 2.645751 0.0 0 \n", - "5 3.366263 3.162278 1201.0 4 \n", - "6 4.636066 3.872983 1201.0 4 \n", - "7 0.054373 1.732051 0.0 1 \n", - "8 8.295398 3.162278 0.0 3 \n", - "9 11.250219 3.000000 2807.0 4 \n", - "10 2.649863 3.162278 1201.0 5 \n", - "11 0.933458 1.414214 0.0 1 \n", - "12 11.250219 3.000000 2807.0 4 \n", - "13 3.486947 5.656854 20736.0 19 \n", - "14 1.740902 2.236068 0.0 4 \n", - "15 3.486947 5.656854 20736.0 19 \n", - "16 1.333579 1.414214 0.0 1 \n", - "17 2.214466 3.000000 1201.0 4 \n", - "18 3.206588 3.000000 19688.0 3 \n", - "19 3.585834 3.162278 5736.0 2 \n", - "20 3.517784 1.732051 0.0 1 \n", - "21 2.499406 3.464102 2807.0 5 \n", - "22 8.942398 3.605551 4530.0 4 \n", - "23 0.538042 2.645751 0.0 0 \n", - "24 4.058520 5.830952 20736.0 21 \n", - "25 0.538042 2.645751 0.0 0 \n", - "26 0.382080 2.000000 0.0 3 \n", - "27 2.971395 3.000000 8835.0 5 \n", - "28 0.442102 2.236068 0.0 3 \n", - "29 0.538042 2.645751 0.0 0 \n", - "30 3.669478 3.000000 0.0 3 \n", - "31 0.538042 2.645751 0.0 0 \n", - "32 0.442102 2.236068 0.0 3 \n", - "33 0.538042 2.645751 0.0 0 \n", - "34 2.985626 3.464102 19688.0 5 \n", - "35 5.620047 3.316625 2807.0 4 \n", - "36 2.499406 3.464102 2807.0 5 \n", - "37 0.217746 2.000000 0.0 2 \n", - "38 -0.087526 1.414214 0.0 1 \n", - "39 2.674157 3.464102 8835.0 4 \n", - "40 1.423992 1.732051 0.0 0 \n", - "41 4.044290 3.316625 5736.0 3 \n", - "42 0.442102 2.236068 0.0 3 \n", - "43 4.802267 3.316625 0.0 0 \n", - "44 3.486947 5.656854 20736.0 19 \n", - "45 3.517784 1.732051 0.0 1 \n", - "46 7.440623 4.242641 8835.0 9 \n", - "47 1.423992 1.732051 0.0 0 \n", - "48 1.532709 1.732051 0.0 0 \n", - "49 1.532709 1.732051 0.0 0 \n", - "\n", - " haplo rep label \n", - "0 0 65 0 \n", - "1 1 65 0 \n", - "2 0 65 0 \n", - "3 1 65 0 \n", - "4 0 65 0 \n", - "5 1 65 0 \n", - "6 0 65 0 \n", - "7 1 65 0 \n", - "8 0 65 0 \n", - "9 1 65 0 \n", - "10 0 65 0 \n", - "11 1 65 0 \n", - "12 0 65 0 \n", - "13 1 65 0 \n", - "14 0 65 0 \n", - "15 1 65 0 \n", - "16 0 65 0 \n", - "17 1 65 0 \n", - "18 0 65 0 \n", - "19 1 65 0 \n", - "20 0 65 0 \n", - "21 1 65 0 \n", - "22 0 65 0 \n", - "23 1 65 0 \n", - "24 0 65 0 \n", - "25 1 65 0 \n", - "26 0 65 0 \n", - "27 1 65 0 \n", - "28 0 65 0 \n", - "29 1 65 0 \n", - "30 0 65 0 \n", - "31 1 65 0 \n", - "32 0 65 0 \n", - "33 1 65 0 \n", - "34 0 65 0 \n", - "35 1 65 0 \n", - "36 0 65 0 \n", - "37 1 65 0 \n", - "38 0 65 0 \n", - "39 1 65 0 \n", - "40 0 65 0 \n", - "41 1 65 0 \n", - "42 0 65 0 \n", - "43 1 65 0 \n", - "44 0 65 0 \n", - "45 1 65 0 \n", - "46 0 65 0 \n", - "47 1 65 0 \n", - "48 0 65 0 \n", - "49 1 65 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_37 42174 0.84348 [1, 0, 0] 67\n", - "1 hap_0 tsk_38 42174 0.84348 [1, 0, 0] 67\n", - "2 hap_0 tsk_41 8882 0.17764 [0, 1, 0] 67\n", - "3 hap_1 tsk_33 8882 0.17764 [0, 1, 0] 67\n", - "4 hap_1 tsk_36 8882 0.17764 [0, 1, 0] 67\n", - "5 hap_1 tsk_38 8882 0.17764 [0, 1, 0] 67\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 26 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 31 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 30 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 25 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 28 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 29 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 31 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 28 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 28 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 28 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 25 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 28 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 25 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 32 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 28 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 31 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 28 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 28 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 26 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 31 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 30 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 25 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 29 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 27 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 31 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 25 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 31 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 26 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 26 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 31 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 27 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 31 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 26 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 22 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 25 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 28 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 26 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 30 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 22 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 31 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 25 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 28 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 29 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 28 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 29 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 30 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 23 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 26 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 27 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.011722 3.266087 -0.434428 \\\n", - "1 ... 4.994340 1.656568 -1.685511 \n", - "2 ... 4.506105 2.815019 -0.598350 \n", - "3 ... 4.265194 2.128122 -0.322267 \n", - "4 ... 4.083227 4.047256 -0.624884 \n", - "5 ... 4.269952 3.447514 -0.498380 \n", - "6 ... 4.570242 3.072885 -0.799797 \n", - "7 ... 4.083227 4.047256 -0.624884 \n", - "8 ... 4.083227 4.047256 -0.624884 \n", - "9 ... 4.244385 3.425196 -0.626436 \n", - "10 ... 5.374771 2.031838 -2.385728 \n", - "11 ... 4.083227 4.047256 -0.624884 \n", - "12 ... 5.044350 2.714535 -2.089122 \n", - "13 ... 4.698961 2.839761 -0.747238 \n", - "14 ... 4.083227 4.047256 -0.624884 \n", - "15 ... 5.101203 1.577731 -1.853715 \n", - "16 ... 4.244385 3.425196 -0.626436 \n", - "17 ... 5.268586 1.842000 -2.277535 \n", - "18 ... 4.011722 3.266087 -0.434428 \n", - "19 ... 4.871135 1.512047 -1.401387 \n", - "20 ... 4.896343 1.665822 -1.600369 \n", - "21 ... 5.044350 2.714535 -2.089122 \n", - "22 ... 4.396183 3.073571 -0.544342 \n", - "23 ... 5.168643 1.925135 -2.261823 \n", - "24 ... 6.300166 1.747913 -4.296481 \n", - "25 ... 5.374771 2.031838 -2.385728 \n", - "26 ... 6.300166 1.747913 -4.296481 \n", - "27 ... 5.057451 2.102189 -2.299773 \n", - "28 ... 4.011722 3.266087 -0.434428 \n", - "29 ... 6.535709 1.964505 -4.043266 \n", - "30 ... 4.430969 2.166512 -0.538903 \n", - "31 ... 4.570242 3.072885 -0.799797 \n", - "32 ... 5.057451 2.102189 -2.299773 \n", - "33 ... 5.007018 1.569773 -1.750014 \n", - "34 ... 4.973690 1.462405 -2.601733 \n", - "35 ... 5.636304 0.992074 -3.402283 \n", - "36 ... 5.574553 1.844356 -2.218624 \n", - "37 ... 4.313663 2.232310 -0.475704 \n", - "38 ... 4.687680 2.185652 -0.827115 \n", - "39 ... 4.973690 1.462405 -2.601733 \n", - "40 ... 6.535709 1.964505 -4.043266 \n", - "41 ... 5.044350 2.714535 -2.089122 \n", - "42 ... 4.244385 3.425196 -0.626436 \n", - "43 ... 5.434238 1.069057 -2.757587 \n", - "44 ... 5.749119 1.147632 -3.361548 \n", - "45 ... 4.452019 2.459531 -0.493140 \n", - "46 ... 5.152983 1.206769 -2.094725 \n", - "47 ... 5.307587 1.149519 -2.521639 \n", - "48 ... 5.157551 2.479666 -2.025550 \n", - "49 ... 5.262533 2.345746 -2.042733 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.687384 3.464102 0.0 1 \\\n", - "1 4.745557 4.358899 12363.0 5 \n", - "2 -0.272716 2.000000 0.0 1 \n", - "3 -0.306754 3.605551 0.0 1 \n", - "4 -0.611339 3.741657 1.0 4 \n", - "5 -0.798617 3.872983 1.0 5 \n", - "6 0.167012 2.236068 0.0 2 \n", - "7 -0.611339 3.741657 1.0 4 \n", - "8 -0.611339 3.741657 1.0 4 \n", - "9 -0.454817 3.162278 0.0 2 \n", - "10 6.138802 2.236068 0.0 1 \n", - "11 -0.611339 3.741657 1.0 4 \n", - "12 3.592626 3.872983 7782.0 2 \n", - "13 0.077650 2.449490 0.0 3 \n", - "14 -0.611339 3.741657 1.0 4 \n", - "15 5.285226 3.872983 0.0 3 \n", - "16 -0.454817 3.162278 0.0 2 \n", - "17 5.247762 2.449490 23699.0 4 \n", - "18 -0.687384 3.464102 0.0 1 \n", - "19 4.368413 4.358899 12363.0 4 \n", - "20 4.340064 4.242641 12363.0 4 \n", - "21 3.592626 3.872983 7782.0 2 \n", - "22 -0.601186 3.316625 0.0 3 \n", - "23 5.041976 2.236068 23699.0 3 \n", - "24 17.646202 5.656854 36921.0 12 \n", - "25 6.138802 2.236068 0.0 1 \n", - "26 17.646202 5.656854 36921.0 12 \n", - "27 5.110489 2.000000 23699.0 2 \n", - "28 -0.687384 3.464102 0.0 1 \n", - "29 15.953219 4.123106 0.0 1 \n", - "30 -0.075334 3.605551 0.0 2 \n", - "31 0.167012 2.236068 0.0 2 \n", - "32 5.110489 2.000000 23699.0 2 \n", - "33 4.812197 3.741657 0.0 2 \n", - "34 8.844705 3.605551 0.0 1 \n", - "35 17.988515 4.123106 0.0 3 \n", - "36 5.469780 2.000000 0.0 2 \n", - "37 -0.322734 3.464102 0.0 1 \n", - "38 0.640717 2.828427 0.0 2 \n", - "39 8.844705 3.605551 0.0 1 \n", - "40 15.953219 4.123106 0.0 1 \n", - "41 3.592626 3.872983 7782.0 2 \n", - "42 -0.454817 3.162278 0.0 2 \n", - "43 12.748274 5.291503 1.0 5 \n", - "44 14.515418 4.000000 0.0 1 \n", - "45 -0.459438 3.316625 0.0 1 \n", - "46 8.468784 4.690416 12363.0 2 \n", - "47 9.947917 4.000000 0.0 0 \n", - "48 3.321788 4.000000 7782.0 3 \n", - "49 3.515515 4.123106 7782.0 4 \n", - "\n", - " haplo rep label \n", - "0 0 67 0 \n", - "1 1 67 0 \n", - "2 0 67 0 \n", - "3 1 67 0 \n", - "4 0 67 0 \n", - "5 1 67 0 \n", - "6 0 67 0 \n", - "7 1 67 0 \n", - "8 0 67 0 \n", - "9 1 67 0 \n", - "10 0 67 0 \n", - "11 1 67 0 \n", - "12 0 67 0 \n", - "13 1 67 0 \n", - "14 0 67 0 \n", - "15 1 67 0 \n", - "16 0 67 0 \n", - "17 1 67 0 \n", - "18 0 67 0 \n", - "19 1 67 0 \n", - "20 0 67 0 \n", - "21 1 67 0 \n", - "22 0 67 0 \n", - "23 1 67 0 \n", - "24 0 67 0 \n", - "25 1 67 0 \n", - "26 0 67 0 \n", - "27 1 67 0 \n", - "28 0 67 0 \n", - "29 1 67 0 \n", - "30 0 67 0 \n", - "31 1 67 0 \n", - "32 0 67 0 \n", - "33 1 67 0 \n", - "34 0 67 0 \n", - "35 1 67 0 \n", - "36 0 67 0 \n", - "37 1 67 0 \n", - "38 0 67 0 \n", - "39 1 67 0 \n", - "40 0 67 0 \n", - "41 1 67 0 \n", - "42 0 67 0 \n", - "43 1 67 0 \n", - "44 0 67 0 \n", - "45 1 67 0 \n", - "46 0 67 0 \n", - "47 1 67 0 \n", - "48 0 67 0 \n", - "49 1 67 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_1 tsk_27 32960 0.6592 [0, 0, 1] 68\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 22 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 25 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 22 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 24 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 25 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 23 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 23 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 25 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 22 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 25 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 26 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 24 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 25 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 23 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 24 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 25 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 25 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 24 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 25 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 25 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 25 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 23 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 23 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 26 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 25 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 23 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 23 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 24 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 25 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 23 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 27 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 22 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 22 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 24 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 22 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 24 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 26 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 23 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 19 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 24 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 23 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 22 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 26 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 22 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 24 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 24 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 22 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 29 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 28 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 23 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.630863 3.235109 -1.292021 \\\n", - "1 ... 4.896290 3.426347 -1.498584 \n", - "2 ... 4.404547 1.959963 -2.392959 \n", - "3 ... 4.744967 1.285292 -2.308456 \n", - "4 ... 4.539060 3.996934 -0.949970 \n", - "5 ... 5.553898 0.874221 -4.177255 \n", - "6 ... 4.563246 1.976790 -2.069733 \n", - "7 ... 4.896290 3.426347 -1.498584 \n", - "8 ... 4.404547 1.959963 -2.392959 \n", - "9 ... 4.896290 3.426347 -1.498584 \n", - "10 ... 4.936254 3.393400 -1.856411 \n", - "11 ... 4.865754 2.884436 -1.318750 \n", - "12 ... 4.539060 3.996934 -0.949970 \n", - "13 ... 4.795574 2.682473 -1.160607 \n", - "14 ... 4.489410 3.685201 -0.847381 \n", - "15 ... 4.866006 0.761988 -3.340255 \n", - "16 ... 4.544227 3.990002 -0.959328 \n", - "17 ... 4.882667 2.639564 -1.870128 \n", - "18 ... 4.539060 3.996934 -0.949970 \n", - "19 ... 4.544227 3.990002 -0.959328 \n", - "20 ... 4.896290 3.426347 -1.498584 \n", - "21 ... 4.563246 1.976790 -2.069733 \n", - "22 ... 4.563246 1.976790 -2.069733 \n", - "23 ... 4.936254 3.393400 -1.856411 \n", - "24 ... 4.544227 3.990002 -0.959328 \n", - "25 ... 4.563246 1.976790 -2.069733 \n", - "26 ... 4.322886 4.192655 -0.888297 \n", - "27 ... 4.489410 3.685201 -0.847381 \n", - "28 ... 4.949755 1.539921 -1.217038 \n", - "29 ... 4.322886 4.192655 -0.888297 \n", - "30 ... 4.608422 1.362446 -1.365000 \n", - "31 ... 4.630863 3.235109 -1.292021 \n", - "32 ... 4.404547 1.959963 -2.392959 \n", - "33 ... 4.882667 2.639564 -1.870128 \n", - "34 ... 4.797165 0.907206 -2.517825 \n", - "35 ... 4.489410 3.685201 -0.847381 \n", - "36 ... 4.936254 3.393400 -1.856411 \n", - "37 ... 4.771811 2.869817 -1.228150 \n", - "38 ... 4.365364 1.023595 -2.181031 \n", - "39 ... 4.865754 2.884436 -1.318750 \n", - "40 ... 5.212150 0.953496 -3.028339 \n", - "41 ... 4.630863 3.235109 -1.292021 \n", - "42 ... 4.936254 3.393400 -1.856411 \n", - "43 ... 4.473439 1.868345 -1.949073 \n", - "44 ... 4.716682 1.512908 -2.489662 \n", - "45 ... 4.716682 1.512908 -2.489662 \n", - "46 ... 4.404547 1.959963 -2.392959 \n", - "47 ... 5.779666 1.395461 -2.401653 \n", - "48 ... 5.196341 2.678039 -1.819140 \n", - "49 ... 4.322886 4.192655 -0.888297 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 0.580842 4.358899 0.0 2 \\\n", - "1 1.299737 4.690416 31346.0 5 \n", - "2 5.023294 4.123106 0.0 0 \n", - "3 6.336515 3.162278 0.0 2 \n", - "4 -0.418321 4.358899 3176.0 7 \n", - "5 22.064881 4.690416 0.0 8 \n", - "6 3.473166 1.414214 0.0 2 \n", - "7 1.299737 4.690416 31346.0 5 \n", - "8 5.023294 4.123106 0.0 0 \n", - "9 1.299737 4.690416 31346.0 5 \n", - "10 1.999173 2.000000 22018.0 3 \n", - "11 0.824659 4.582576 3924.0 4 \n", - "12 -0.418321 4.358899 3176.0 7 \n", - "13 0.232986 4.472136 0.0 3 \n", - "14 -0.709354 4.242641 3176.0 6 \n", - "15 16.603742 4.000000 179.0 3 \n", - "16 -0.399859 4.358899 10332.0 7 \n", - "17 2.203988 2.000000 22018.0 2 \n", - "18 -0.418321 4.358899 3176.0 7 \n", - "19 -0.399859 4.358899 10332.0 7 \n", - "20 1.299737 4.690416 31346.0 5 \n", - "21 3.473166 1.414214 0.0 2 \n", - "22 3.473166 1.414214 0.0 2 \n", - "23 1.999173 2.000000 22018.0 3 \n", - "24 -0.399859 4.358899 10332.0 7 \n", - "25 3.473166 1.414214 0.0 2 \n", - "26 -0.656812 4.123106 3176.0 5 \n", - "27 -0.709354 4.242641 3176.0 6 \n", - "28 2.761851 4.358899 179.0 6 \n", - "29 -0.656812 4.123106 3176.0 5 \n", - "30 2.867304 3.872983 0.0 1 \n", - "31 0.580842 4.358899 0.0 2 \n", - "32 5.023294 4.123106 0.0 0 \n", - "33 2.203988 2.000000 22018.0 2 \n", - "34 10.285909 3.605551 6101.0 3 \n", - "35 -0.709354 4.242641 3176.0 6 \n", - "36 1.999173 2.000000 22018.0 3 \n", - "37 0.405523 4.472136 0.0 3 \n", - "38 5.529698 2.000000 0.0 0 \n", - "39 0.824659 4.582576 3924.0 4 \n", - "40 13.615657 4.690416 1738.0 4 \n", - "41 0.580842 4.358899 0.0 2 \n", - "42 1.999173 2.000000 22018.0 3 \n", - "43 2.886556 1.000000 0.0 1 \n", - "44 6.860242 3.162278 1.0 2 \n", - "45 6.860242 3.162278 1.0 2 \n", - "46 5.023294 4.123106 0.0 0 \n", - "47 8.901255 4.242641 6101.0 7 \n", - "48 1.982009 2.449490 22018.0 5 \n", - "49 -0.656812 4.123106 3176.0 5 \n", - "\n", - " haplo rep label \n", - "0 0 68 0 \n", - "1 1 68 0 \n", - "2 0 68 0 \n", - "3 1 68 0 \n", - "4 0 68 0 \n", - "5 1 68 0 \n", - "6 0 68 0 \n", - "7 1 68 0 \n", - "8 0 68 0 \n", - "9 1 68 0 \n", - "10 0 68 0 \n", - "11 1 68 0 \n", - "12 0 68 0 \n", - "13 1 68 0 \n", - "14 0 68 0 \n", - "15 1 68 0 \n", - "16 0 68 0 \n", - "17 1 68 0 \n", - "18 0 68 0 \n", - "19 1 68 0 \n", - "20 0 68 0 \n", - "21 1 68 0 \n", - "22 0 68 0 \n", - "23 1 68 0 \n", - "24 0 68 0 \n", - "25 1 68 0 \n", - "26 0 68 0 \n", - "27 1 68 0 \n", - "28 0 68 0 \n", - "29 1 68 0 \n", - "30 0 68 0 \n", - "31 1 68 0 \n", - "32 0 68 0 \n", - "33 1 68 0 \n", - "34 0 68 0 \n", - "35 1 68 0 \n", - "36 0 68 0 \n", - "37 1 68 0 \n", - "38 0 68 0 \n", - "39 1 68 0 \n", - "40 0 68 0 \n", - "41 1 68 0 \n", - "42 0 68 0 \n", - "43 1 68 0 \n", - "44 0 68 0 \n", - "45 1 68 0 \n", - "46 0 68 0 \n", - "47 1 68 0 \n", - "48 0 68 0 \n", - "49 1 68 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_1 tsk_30 10408 0.20816 [0, 1, 0] 69\n", - "1 hap_1 tsk_39 30619 0.61238 [0, 0, 1] 69\n", - "2 hap_1 tsk_40 30619 0.61238 [0, 0, 1] 69\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 28 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 30 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 31 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 33 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 37 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 30 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 29 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 40 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 28 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 37 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 28 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 30 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 28 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 31 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 33 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 38 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 34 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 31 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 29 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 40 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 42 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 35 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 34 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 33 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 27 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 37 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 32 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 36 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 28 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 36 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 27 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 28 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 37 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 33 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 28 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 37 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 30 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 28 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 30 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 36 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 37 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 31 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 30 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 34 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 36 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 40 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 29 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 29 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 36 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.051799 3.962926 -0.811332 \\\n", - "1 ... 4.377235 3.139816 -0.754780 \n", - "2 ... 5.220466 2.286731 -2.097976 \n", - "3 ... 5.208255 1.374085 -3.316734 \n", - "4 ... 4.718510 4.235662 -1.171863 \n", - "5 ... 4.288245 3.710955 -0.860386 \n", - "6 ... 4.216193 3.443718 -0.720296 \n", - "7 ... 6.121203 3.310873 -2.686935 \n", - "8 ... 5.051037 3.427028 -1.682362 \n", - "9 ... 4.746794 4.007943 -1.143599 \n", - "10 ... 5.051037 3.427028 -1.682362 \n", - "11 ... 5.163033 1.483086 -1.230941 \n", - "12 ... 4.051799 3.962926 -0.811332 \n", - "13 ... 4.332616 2.448438 -0.776150 \n", - "14 ... 5.220466 2.286731 -2.097976 \n", - "15 ... 5.208255 1.374085 -3.316734 \n", - "16 ... 6.023447 1.778089 -2.771087 \n", - "17 ... 4.804443 1.737330 -1.106072 \n", - "18 ... 5.220466 2.286731 -2.097976 \n", - "19 ... 5.158038 3.254644 -1.680633 \n", - "20 ... 6.121203 3.310873 -2.686935 \n", - "21 ... 6.317458 2.789726 -2.668191 \n", - "22 ... 4.654416 3.596410 -1.053713 \n", - "23 ... 5.363833 1.849296 -2.313165 \n", - "24 ... 4.724011 2.703717 -0.816024 \n", - "25 ... 5.027477 2.864475 -1.539794 \n", - "26 ... 4.746794 4.007943 -1.143599 \n", - "27 ... 4.542046 2.869822 -0.759313 \n", - "28 ... 4.613077 4.339523 -1.143525 \n", - "29 ... 5.352786 1.847680 -1.811522 \n", - "30 ... 4.613077 4.339523 -1.143525 \n", - "31 ... 5.262563 1.845431 -1.723405 \n", - "32 ... 4.051799 3.962926 -0.811332 \n", - "33 ... 4.718510 4.235662 -1.171863 \n", - "34 ... 4.703034 1.741474 -0.995803 \n", - "35 ... 4.051799 3.962926 -0.811332 \n", - "36 ... 4.718510 4.235662 -1.171863 \n", - "37 ... 4.288245 3.710955 -0.860386 \n", - "38 ... 5.051037 3.427028 -1.682362 \n", - "39 ... 4.288245 3.710955 -0.860386 \n", - "40 ... 4.613077 4.339523 -1.143525 \n", - "41 ... 4.775079 3.778623 -1.102475 \n", - "42 ... 4.407863 2.830741 -0.703709 \n", - "43 ... 4.550160 2.316043 -0.860347 \n", - "44 ... 5.363833 1.849296 -2.313165 \n", - "45 ... 6.461728 2.146074 -3.345578 \n", - "46 ... 6.121203 3.310873 -2.686935 \n", - "47 ... 5.158038 3.254644 -1.680633 \n", - "48 ... 4.235286 3.402350 -0.750517 \n", - "49 ... 6.461728 2.146074 -3.345578 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.761318 2.828427 0.0 0 \\\n", - "1 -0.772882 3.162278 0.0 2 \n", - "2 6.035310 3.872983 1444.0 3 \n", - "3 12.815718 4.690416 4685.0 4 \n", - "4 0.046761 4.898979 2504.0 8 \n", - "5 -0.577572 3.162278 1.0 2 \n", - "6 -0.919302 3.000000 0.0 1 \n", - "7 5.831622 2.449490 29215.0 4 \n", - "8 1.889917 1.732051 1.0 3 \n", - "9 -0.002507 4.898979 2504.0 8 \n", - "10 1.889917 1.732051 1.0 3 \n", - "11 4.481860 3.605551 2261.0 4 \n", - "12 -0.761318 2.828427 0.0 0 \n", - "13 -0.415131 3.464102 1.0 2 \n", - "14 6.035310 3.872983 1444.0 3 \n", - "15 12.815718 4.690416 4685.0 4 \n", - "16 7.801430 3.464102 29214.0 5 \n", - "17 2.484022 4.690416 1444.0 5 \n", - "18 6.035310 3.872983 1444.0 3 \n", - "19 1.930449 2.000000 1.0 4 \n", - "20 5.831622 2.449490 29215.0 4 \n", - "21 5.846093 2.828427 29215.0 6 \n", - "22 -0.154787 4.898979 2504.0 7 \n", - "23 7.036544 5.099020 41419.0 11 \n", - "24 -0.388077 3.000000 0.0 3 \n", - "25 1.563170 1.414214 0.0 2 \n", - "26 -0.002507 4.898979 2504.0 8 \n", - "27 -0.589327 3.162278 0.0 3 \n", - "28 -0.069465 4.795832 2504.0 7 \n", - "29 4.462715 3.605551 13809.0 7 \n", - "30 -0.069465 4.795832 2504.0 7 \n", - "31 4.037773 3.464102 13809.0 6 \n", - "32 -0.761318 2.828427 0.0 0 \n", - "33 0.046761 4.898979 2504.0 8 \n", - "34 2.067768 4.582576 1444.0 4 \n", - "35 -0.761318 2.828427 0.0 0 \n", - "36 0.046761 4.898979 2504.0 8 \n", - "37 -0.577572 3.162278 1.0 2 \n", - "38 1.889917 1.732051 1.0 3 \n", - "39 -0.577572 3.162278 1.0 2 \n", - "40 -0.069465 4.795832 2504.0 7 \n", - "41 -0.103122 4.898979 2504.0 8 \n", - "42 -0.631237 3.000000 0.0 2 \n", - "43 -0.007647 3.741657 1.0 4 \n", - "44 7.036544 5.099020 41419.0 11 \n", - "45 12.226682 6.324555 19746.0 16 \n", - "46 5.831622 2.449490 29215.0 4 \n", - "47 1.930449 2.000000 1.0 4 \n", - "48 -0.854698 3.000000 0.0 1 \n", - "49 12.226682 6.324555 19746.0 16 \n", - "\n", - " haplo rep label \n", - "0 0 69 0 \n", - "1 1 69 0 \n", - "2 0 69 0 \n", - "3 1 69 0 \n", - "4 0 69 0 \n", - "5 1 69 0 \n", - "6 0 69 0 \n", - "7 1 69 0 \n", - "8 0 69 0 \n", - "9 1 69 0 \n", - "10 0 69 0 \n", - "11 1 69 0 \n", - "12 0 69 0 \n", - "13 1 69 0 \n", - "14 0 69 0 \n", - "15 1 69 0 \n", - "16 0 69 0 \n", - "17 1 69 0 \n", - "18 0 69 0 \n", - "19 1 69 0 \n", - "20 0 69 0 \n", - "21 1 69 0 \n", - "22 0 69 0 \n", - "23 1 69 0 \n", - "24 0 69 0 \n", - "25 1 69 0 \n", - "26 0 69 0 \n", - "27 1 69 0 \n", - "28 0 69 0 \n", - "29 1 69 0 \n", - "30 0 69 0 \n", - "31 1 69 0 \n", - "32 0 69 0 \n", - "33 1 69 0 \n", - "34 0 69 0 \n", - "35 1 69 0 \n", - "36 0 69 0 \n", - "37 1 69 0 \n", - "38 0 69 0 \n", - "39 1 69 0 \n", - "40 0 69 0 \n", - "41 1 69 0 \n", - "42 0 69 0 \n", - "43 1 69 0 \n", - "44 0 69 0 \n", - "45 1 69 0 \n", - "46 0 69 0 \n", - "47 1 69 0 \n", - "48 0 69 0 \n", - "49 1 69 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_26 9325 0.18650 [0, 1, 0] 73\n", - "1 hap_0 tsk_28 26984 0.53968 [0, 0, 1] 73\n", - "2 hap_0 tsk_38 9325 0.18650 [0, 1, 0] 73\n", - "3 hap_0 tsk_46 9325 0.18650 [0, 1, 0] 73\n", - "4 hap_1 tsk_26 9325 0.18650 [0, 1, 0] 73\n", - "5 hap_1 tsk_35 9325 0.18650 [0, 1, 0] 73\n", - "6 hap_1 tsk_38 26984 0.53968 [0, 0, 1] 73\n", - "7 hap_1 tsk_39 26984 0.53968 [0, 0, 1] 73\n", - "8 hap_1 tsk_49 26984 0.53968 [0, 0, 1] 73\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 17 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 15 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 23 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 21 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 16 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 16 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 19 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 16 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 14 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 19 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 14 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 19 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 19 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 15 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 19 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 16 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 17 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 17 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 17 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 17 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 19 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 21 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 15 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 16 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 17 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 16 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 23 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 19 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 19 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 19 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 16 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 14 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 16 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 16 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 19 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 17 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 16 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 17 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 19 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 17 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 16 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 17 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 20 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 20 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 21 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 18 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 15 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 16 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 21 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 19 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.051403 1.106134 -2.334026 \\\n", - "1 ... 4.387496 2.109879 -1.858384 \n", - "2 ... 4.624431 1.334634 -2.693100 \n", - "3 ... 4.439501 1.210828 -2.806158 \n", - "4 ... 3.913919 1.241241 -2.351908 \n", - "5 ... 4.010679 1.354458 -2.340306 \n", - "6 ... 4.441577 1.832398 -2.756050 \n", - "7 ... 4.519870 1.890778 -1.839803 \n", - "8 ... 4.058413 0.689281 -2.388315 \n", - "9 ... 4.195306 2.599408 -1.722039 \n", - "10 ... 4.297138 2.014602 -1.806216 \n", - "11 ... 4.195306 2.599408 -1.722039 \n", - "12 ... 4.195306 2.599408 -1.722039 \n", - "13 ... 4.199569 0.763620 -2.092275 \n", - "14 ... 4.317174 1.802007 -1.524282 \n", - "15 ... 4.135201 0.860111 -3.452211 \n", - "16 ... 4.796448 1.714087 -2.084116 \n", - "17 ... 4.178524 1.739940 -1.815356 \n", - "18 ... 4.796448 1.714087 -2.084116 \n", - "19 ... 4.659960 0.604769 -4.267501 \n", - "20 ... 4.341475 0.991594 -3.228931 \n", - "21 ... 4.439501 1.210828 -2.806158 \n", - "22 ... 4.517906 1.468528 -1.911056 \n", - "23 ... 4.394441 0.968888 -2.613191 \n", - "24 ... 4.051403 1.106134 -2.334026 \n", - "25 ... 4.135201 0.860111 -3.452211 \n", - "26 ... 4.624431 1.334634 -2.693100 \n", - "27 ... 4.441577 1.832398 -2.756050 \n", - "28 ... 4.195306 2.599408 -1.722039 \n", - "29 ... 4.441577 1.832398 -2.756050 \n", - "30 ... 4.010679 1.354458 -2.340306 \n", - "31 ... 4.297138 2.014602 -1.806216 \n", - "32 ... 4.010679 1.354458 -2.340306 \n", - "33 ... 4.068458 1.727648 -1.740818 \n", - "34 ... 4.341475 0.991594 -3.228931 \n", - "35 ... 4.206808 1.542767 -1.701596 \n", - "36 ... 4.068458 1.727648 -1.740818 \n", - "37 ... 4.178524 1.739940 -1.815356 \n", - "38 ... 4.195306 2.599408 -1.722039 \n", - "39 ... 4.201870 1.144287 -1.474909 \n", - "40 ... 4.585736 0.571024 -4.413187 \n", - "41 ... 4.500426 0.986167 -2.737995 \n", - "42 ... 4.354930 0.954581 -2.241641 \n", - "43 ... 4.481535 1.195847 -2.101133 \n", - "44 ... 4.586706 1.202129 -2.220426 \n", - "45 ... 4.724016 0.803672 -2.970172 \n", - "46 ... 4.387496 2.109879 -1.858384 \n", - "47 ... 3.913919 1.241241 -2.351908 \n", - "48 ... 4.491637 0.745200 -3.002563 \n", - "49 ... 4.441577 1.832398 -2.756050 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 5.183223 2.000000 0.0 1 \\\n", - "1 2.367502 4.358899 665.0 7 \n", - "2 7.793352 4.000000 12103.0 7 \n", - "3 8.183533 4.000000 12103.0 6 \n", - "4 5.105878 1.732051 0.0 0 \n", - "5 5.653561 3.605551 0.0 3 \n", - "6 6.311005 4.472136 18505.0 7 \n", - "7 2.371862 4.472136 665.0 8 \n", - "8 9.091745 3.316625 0.0 1 \n", - "9 1.922651 2.828427 0.0 2 \n", - "10 2.214075 4.242641 665.0 6 \n", - "11 1.922651 2.828427 0.0 2 \n", - "12 1.922651 2.828427 0.0 2 \n", - "13 8.191941 3.162278 0.0 5 \n", - "14 1.589782 2.828427 0.0 2 \n", - "15 12.918970 3.464102 0.0 3 \n", - "16 4.643007 5.000000 1117.0 9 \n", - "17 2.941589 2.645751 0.0 3 \n", - "18 4.643007 5.000000 1117.0 9 \n", - "19 22.984221 4.000000 0.0 8 \n", - "20 11.569453 2.828427 14689.0 2 \n", - "21 8.183533 4.000000 12103.0 6 \n", - "22 3.112990 4.358899 665.0 8 \n", - "23 8.064036 4.000000 4521.0 6 \n", - "24 5.183223 2.000000 0.0 1 \n", - "25 12.918970 3.464102 0.0 3 \n", - "26 7.793352 4.000000 12103.0 7 \n", - "27 6.311005 4.472136 18505.0 7 \n", - "28 1.922651 2.828427 0.0 2 \n", - "29 6.311005 4.472136 18505.0 7 \n", - "30 5.653561 3.605551 0.0 3 \n", - "31 2.214075 4.242641 665.0 6 \n", - "32 5.653561 3.605551 0.0 3 \n", - "33 2.645891 2.449490 0.0 2 \n", - "34 11.569453 2.828427 14689.0 2 \n", - "35 2.638246 2.645751 0.0 3 \n", - "36 2.645891 2.449490 0.0 2 \n", - "37 2.941589 2.645751 0.0 3 \n", - "38 1.922651 2.828427 0.0 2 \n", - "39 2.926733 2.000000 0.0 1 \n", - "40 24.307800 3.741657 665.0 4 \n", - "41 8.773142 4.123106 4521.0 7 \n", - "42 6.336419 4.000000 12103.0 5 \n", - "43 5.235192 4.000000 27720.0 5 \n", - "44 5.885335 4.123106 27720.0 6 \n", - "45 12.866118 4.123106 1117.0 6 \n", - "46 2.367502 4.358899 665.0 7 \n", - "47 5.105878 1.732051 0.0 0 \n", - "48 12.405098 3.162278 1116.0 5 \n", - "49 6.311005 4.472136 18505.0 7 \n", - "\n", - " haplo rep label \n", - "0 0 73 0 \n", - "1 1 73 0 \n", - "2 0 73 0 \n", - "3 1 73 0 \n", - "4 0 73 0 \n", - "5 1 73 0 \n", - "6 0 73 0 \n", - "7 1 73 0 \n", - "8 0 73 0 \n", - "9 1 73 0 \n", - "10 0 73 0 \n", - "11 1 73 0 \n", - "12 0 73 0 \n", - "13 1 73 0 \n", - "14 0 73 0 \n", - "15 1 73 0 \n", - "16 0 73 0 \n", - "17 1 73 0 \n", - "18 0 73 0 \n", - "19 1 73 0 \n", - "20 0 73 0 \n", - "21 1 73 0 \n", - "22 0 73 0 \n", - "23 1 73 0 \n", - "24 0 73 0 \n", - "25 1 73 0 \n", - "26 0 73 0 \n", - "27 1 73 0 \n", - "28 0 73 0 \n", - "29 1 73 0 \n", - "30 0 73 0 \n", - "31 1 73 0 \n", - "32 0 73 0 \n", - "33 1 73 0 \n", - "34 0 73 0 \n", - "35 1 73 0 \n", - "36 0 73 0 \n", - "37 1 73 0 \n", - "38 0 73 0 \n", - "39 1 73 0 \n", - "40 0 73 0 \n", - "41 1 73 0 \n", - "42 0 73 0 \n", - "43 1 73 0 \n", - "44 0 73 0 \n", - "45 1 73 0 \n", - "46 0 73 0 \n", - "47 1 73 0 \n", - "48 0 73 0 \n", - "49 1 73 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_27 50000 1.00000 [1, 0, 0] 76\n", - "1 hap_0 tsk_28 50000 1.00000 [1, 0, 0] 76\n", - "2 hap_0 tsk_29 50000 1.00000 [1, 0, 0] 76\n", - "3 hap_0 tsk_31 49884 0.99768 [1, 0, 0] 76\n", - "4 hap_0 tsk_33 49884 0.99768 [1, 0, 0] 76\n", - "5 hap_0 tsk_38 49884 0.99768 [1, 0, 0] 76\n", - "6 hap_0 tsk_39 50000 1.00000 [1, 0, 0] 76\n", - "7 hap_0 tsk_41 49884 0.99768 [1, 0, 0] 76\n", - "8 hap_0 tsk_43 50000 1.00000 [1, 0, 0] 76\n", - "9 hap_1 tsk_26 50000 1.00000 [1, 0, 0] 76\n", - "10 hap_1 tsk_32 49884 0.99768 [1, 0, 0] 76\n", - "11 hap_1 tsk_33 49884 0.99768 [1, 0, 0] 76\n", - "12 hap_1 tsk_35 49884 0.99768 [1, 0, 0] 76\n", - "13 hap_1 tsk_39 49884 0.99768 [1, 0, 0] 76\n", - "14 hap_1 tsk_41 49884 0.99768 [1, 0, 0] 76\n", - "15 hap_1 tsk_43 49884 0.99768 [1, 0, 0] 76\n", - "16 hap_1 tsk_49 50000 1.00000 [1, 0, 0] 76\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 29 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 31 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 33 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 25 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 27 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 31 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 27 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 33 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 26 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 30 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 31 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 31 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 25 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 31 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 32 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 26 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 32 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 32 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 31 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 31 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 30 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 26 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 33 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 30 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 35 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 32 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 25 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 31 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 26 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 25 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 32 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 30 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 25 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 25 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 33 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 31 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 27 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 28 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 35 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 32 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 28 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 30 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 32 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 32 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 31 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 30 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 32 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 30 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 33 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 27 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.848488 4.172162 -1.073449 \\\n", - "1 ... 5.071782 3.117032 -1.912442 \n", - "2 ... 5.541077 1.576467 -3.377027 \n", - "3 ... 4.768386 3.262495 -1.181164 \n", - "4 ... 4.882472 3.841463 -1.384681 \n", - "5 ... 5.071782 3.117032 -1.912442 \n", - "6 ... 4.882472 3.841463 -1.384681 \n", - "7 ... 5.902147 1.244658 -3.145287 \n", - "8 ... 4.888610 3.061496 -1.191818 \n", - "9 ... 4.854503 4.793805 -1.138777 \n", - "10 ... 5.079091 3.762838 -1.108153 \n", - "11 ... 5.333143 2.557585 -1.638134 \n", - "12 ... 4.762805 1.835692 -2.499156 \n", - "13 ... 4.909784 4.774024 -1.116458 \n", - "14 ... 5.368152 2.382939 -1.755860 \n", - "15 ... 5.497064 1.182289 -2.409958 \n", - "16 ... 5.244032 2.020131 -2.150155 \n", - "17 ... 5.244032 2.020131 -2.150155 \n", - "18 ... 5.071782 3.117032 -1.912442 \n", - "19 ... 5.071782 3.117032 -1.912442 \n", - "20 ... 5.921113 1.500415 -2.285613 \n", - "21 ... 4.888610 3.061496 -1.191818 \n", - "22 ... 5.036262 1.436066 -2.350924 \n", - "23 ... 4.899404 1.515837 -2.117000 \n", - "24 ... 5.184352 1.762494 -2.656164 \n", - "25 ... 4.996548 4.794506 -1.166423 \n", - "26 ... 4.762805 1.835692 -2.499156 \n", - "27 ... 5.762465 2.314000 -2.127694 \n", - "28 ... 4.888610 3.061496 -1.191818 \n", - "29 ... 4.762805 1.835692 -2.499156 \n", - "30 ... 5.082979 1.523321 -2.204834 \n", - "31 ... 5.897777 1.056224 -3.788113 \n", - "32 ... 5.346028 1.979983 -1.988682 \n", - "33 ... 5.346028 1.979983 -1.988682 \n", - "34 ... 6.046126 1.364366 -2.938749 \n", - "35 ... 4.909784 4.774024 -1.116458 \n", - "36 ... 4.882472 3.841463 -1.384681 \n", - "37 ... 5.010083 1.619065 -1.612486 \n", - "38 ... 5.184352 1.762494 -2.656164 \n", - "39 ... 4.996548 4.794506 -1.166423 \n", - "40 ... 5.252777 0.928331 -2.949487 \n", - "41 ... 4.960859 3.989875 -1.091063 \n", - "42 ... 5.194908 0.972932 -2.883187 \n", - "43 ... 4.996548 4.794506 -1.166423 \n", - "44 ... 5.762465 2.314000 -2.127694 \n", - "45 ... 4.854503 4.793805 -1.138777 \n", - "46 ... 5.094082 1.490333 -2.265505 \n", - "47 ... 4.854503 4.793805 -1.138777 \n", - "48 ... 5.541077 1.576467 -3.377027 \n", - "49 ... 4.882472 3.841463 -1.384681 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.368567 1.414214 0.0 0 \\\n", - "1 2.904175 4.000000 3961.0 5 \n", - "2 12.302020 4.358899 10784.0 8 \n", - "3 0.299520 4.123106 7920.0 8 \n", - "4 0.865516 4.358899 31399.0 10 \n", - "5 2.904175 4.000000 3961.0 5 \n", - "6 0.865516 4.358899 31399.0 10 \n", - "7 13.461368 4.898979 8834.0 9 \n", - "8 0.421726 4.242641 7920.0 9 \n", - "9 -0.238620 1.000000 0.0 0 \n", - "10 -0.165473 2.000000 0.0 2 \n", - "11 2.274295 4.123106 9899.0 7 \n", - "12 6.315563 4.582576 7920.0 4 \n", - "13 -0.295447 1.414214 0.0 1 \n", - "14 2.750008 4.242641 9899.0 7 \n", - "15 10.689516 3.605551 3961.0 6 \n", - "16 5.276442 3.316625 37771.0 7 \n", - "17 5.276442 3.316625 37771.0 7 \n", - "18 2.904175 4.000000 3961.0 5 \n", - "19 2.904175 4.000000 3961.0 5 \n", - "20 8.537077 5.291503 8834.0 12 \n", - "21 0.421726 4.242641 7920.0 9 \n", - "22 6.885729 3.872983 5445.0 3 \n", - "23 4.951481 2.828427 0.0 2 \n", - "24 7.808570 4.123106 13053.0 5 \n", - "25 -0.114961 1.732051 0.0 2 \n", - "26 6.315563 4.582576 7920.0 4 \n", - "27 5.479058 4.690416 10784.0 9 \n", - "28 0.421726 4.242641 7920.0 9 \n", - "29 6.315563 4.582576 7920.0 4 \n", - "30 5.465368 3.162278 0.0 4 \n", - "31 19.032514 4.795832 8834.0 13 \n", - "32 5.676807 4.242641 7920.0 7 \n", - "33 5.676807 4.242641 7920.0 7 \n", - "34 12.159375 4.358899 8834.0 11 \n", - "35 -0.295447 1.414214 0.0 1 \n", - "36 0.865516 4.358899 31399.0 10 \n", - "37 3.575065 2.645751 6109.0 4 \n", - "38 7.808570 4.123106 13053.0 5 \n", - "39 -0.114961 1.732051 0.0 2 \n", - "40 15.154152 3.316625 7766.0 4 \n", - "41 -0.277475 1.732051 0.0 1 \n", - "42 12.861300 2.000000 0.0 2 \n", - "43 -0.114961 1.732051 0.0 2 \n", - "44 5.479058 4.690416 10784.0 9 \n", - "45 -0.238620 1.000000 0.0 0 \n", - "46 5.860789 3.162278 0.0 4 \n", - "47 -0.238620 1.000000 0.0 0 \n", - "48 12.302020 4.358899 10784.0 8 \n", - "49 0.865516 4.358899 31399.0 10 \n", - "\n", - " haplo rep label \n", - "0 0 76 0 \n", - "1 1 76 0 \n", - "2 0 76 0 \n", - "3 1 76 0 \n", - "4 0 76 0 \n", - "5 1 76 0 \n", - "6 0 76 0 \n", - "7 1 76 0 \n", - "8 0 76 0 \n", - "9 1 76 0 \n", - "10 0 76 0 \n", - "11 1 76 0 \n", - "12 0 76 0 \n", - "13 1 76 0 \n", - "14 0 76 0 \n", - "15 1 76 0 \n", - "16 0 76 0 \n", - "17 1 76 0 \n", - "18 0 76 0 \n", - "19 1 76 0 \n", - "20 0 76 0 \n", - "21 1 76 0 \n", - "22 0 76 0 \n", - "23 1 76 0 \n", - "24 0 76 0 \n", - "25 1 76 0 \n", - "26 0 76 0 \n", - "27 1 76 0 \n", - "28 0 76 0 \n", - "29 1 76 0 \n", - "30 0 76 0 \n", - "31 1 76 0 \n", - "32 0 76 0 \n", - "33 1 76 0 \n", - "34 0 76 0 \n", - "35 1 76 0 \n", - "36 0 76 0 \n", - "37 1 76 0 \n", - "38 0 76 0 \n", - "39 1 76 0 \n", - "40 0 76 0 \n", - "41 1 76 0 \n", - "42 0 76 0 \n", - "43 1 76 0 \n", - "44 0 76 0 \n", - "45 1 76 0 \n", - "46 0 76 0 \n", - "47 1 76 0 \n", - "48 0 76 0 \n", - "49 1 76 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_26 17345 0.3469 [0, 0, 1] 79\n", - "1 hap_0 tsk_31 17345 0.3469 [0, 0, 1] 79\n", - "2 hap_0 tsk_39 17345 0.3469 [0, 0, 1] 79\n", - "3 hap_1 tsk_38 17345 0.3469 [0, 0, 1] 79\n", - "4 hap_1 tsk_42 17345 0.3469 [0, 0, 1] 79\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 28 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 33 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 29 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 28 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 38 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 33 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 28 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 25 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 29 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 33 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 30 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 31 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 29 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 33 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 31 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 32 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 25 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 30 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 34 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 28 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 27 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 33 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 32 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 32 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 33 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 25 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 27 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 29 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 30 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 32 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 31 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 29 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 33 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 29 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 29 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 34 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 30 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 37 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 31 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 28 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 29 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 27 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 24 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 24 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 29 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 32 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 31 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 37 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 32 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.765776 2.787382 -0.272467 \\\n", - "1 ... 5.146274 3.895859 -2.019593 \n", - "2 ... 4.525512 3.179739 -0.679732 \n", - "3 ... 5.083466 2.378370 -1.015284 \n", - "4 ... 6.358884 2.504595 -1.732069 \n", - "5 ... 5.146274 3.895859 -2.019593 \n", - "6 ... 4.877003 2.354840 -1.615233 \n", - "7 ... 4.681335 1.945103 -0.492420 \n", - "8 ... 5.180630 2.341070 -1.073873 \n", - "9 ... 5.572320 2.649249 -1.553851 \n", - "10 ... 4.633992 2.826119 -0.601378 \n", - "11 ... 4.915560 3.417271 -0.572684 \n", - "12 ... 4.525512 3.179739 -0.679732 \n", - "13 ... 5.146274 3.895859 -2.019593 \n", - "14 ... 5.526491 2.917896 -1.804365 \n", - "15 ... 4.781826 3.834139 -0.710852 \n", - "16 ... 4.681335 1.945103 -0.492420 \n", - "17 ... 5.277863 2.284160 -1.104422 \n", - "18 ... 5.467068 1.211169 -2.334907 \n", - "19 ... 4.877003 2.354840 -1.615233 \n", - "20 ... 4.372447 3.061703 -0.626102 \n", - "21 ... 5.146274 3.895859 -2.019593 \n", - "22 ... 4.781826 3.834139 -0.710852 \n", - "23 ... 5.729443 2.873482 -1.471212 \n", - "24 ... 5.146274 3.895859 -2.019593 \n", - "25 ... 4.743944 1.554996 -1.961739 \n", - "26 ... 4.789918 2.036686 -0.447990 \n", - "27 ... 4.556915 2.574530 -1.043218 \n", - "28 ... 4.681429 2.384220 -0.995495 \n", - "29 ... 4.995525 3.544728 -0.729755 \n", - "30 ... 5.641494 2.913550 -1.418745 \n", - "31 ... 4.525390 3.220846 -0.719927 \n", - "32 ... 4.903378 3.296889 -0.784176 \n", - "33 ... 5.146274 3.895859 -2.019593 \n", - "34 ... 5.423455 2.486136 -1.665505 \n", - "35 ... 4.556915 2.574530 -1.043218 \n", - "36 ... 6.351353 1.600309 -2.894716 \n", - "37 ... 4.903378 3.296889 -0.784176 \n", - "38 ... 6.028641 2.595490 -2.203082 \n", - "39 ... 4.966590 2.632982 -0.680419 \n", - "40 ... 4.877003 2.354840 -1.615233 \n", - "41 ... 4.525390 3.220846 -0.719927 \n", - "42 ... 4.372447 3.061703 -0.626102 \n", - "43 ... 4.626957 1.691273 -2.022328 \n", - "44 ... 4.626957 1.691273 -2.022328 \n", - "45 ... 4.641870 2.753040 -0.434464 \n", - "46 ... 4.995525 3.544728 -0.729755 \n", - "47 ... 5.526491 2.917896 -1.804365 \n", - "48 ... 6.028641 2.595490 -2.203082 \n", - "49 ... 4.781826 3.834139 -0.710852 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.323384 1.732051 0.0 1 \\\n", - "1 2.615072 5.000000 10684.0 8 \n", - "2 -0.181712 4.472136 15903.0 3 \n", - "3 1.608242 3.872983 0.0 1 \n", - "4 3.386029 3.741657 19459.0 3 \n", - "5 2.615072 5.000000 10684.0 8 \n", - "6 3.522619 5.000000 3592.0 5 \n", - "7 1.200073 3.464102 0.0 1 \n", - "8 1.862922 4.000000 0.0 2 \n", - "9 1.828437 3.000000 0.0 1 \n", - "10 -0.249761 4.358899 0.0 3 \n", - "11 -0.179998 1.414214 4305.0 2 \n", - "12 -0.181712 4.472136 15903.0 3 \n", - "13 2.615072 5.000000 10684.0 8 \n", - "14 2.799536 3.316625 0.0 1 \n", - "15 0.038355 1.732051 31281.0 2 \n", - "16 1.200073 3.464102 0.0 1 \n", - "17 2.029174 4.123106 0.0 3 \n", - "18 9.608050 4.242641 570.0 4 \n", - "19 3.522619 5.000000 3592.0 5 \n", - "20 -0.187140 4.000000 0.0 0 \n", - "21 2.615072 5.000000 10684.0 8 \n", - "22 0.038355 1.732051 31281.0 2 \n", - "23 1.717789 2.449490 0.0 2 \n", - "24 2.615072 5.000000 10684.0 8 \n", - "25 4.773574 4.690416 0.0 3 \n", - "26 0.751789 4.242641 0.0 2 \n", - "27 0.964649 4.000000 15903.0 3 \n", - "28 0.890166 4.123106 15903.0 4 \n", - "29 0.251415 1.732051 4306.0 3 \n", - "30 1.451448 2.236068 0.0 1 \n", - "31 -0.105077 4.242641 0.0 2 \n", - "32 0.328171 2.645751 31281.0 3 \n", - "33 2.615072 5.000000 10684.0 8 \n", - "34 2.364613 3.316625 0.0 0 \n", - "35 0.964649 4.000000 15903.0 3 \n", - "36 10.820483 4.795832 19459.0 3 \n", - "37 0.328171 2.645751 31281.0 3 \n", - "38 5.261181 4.123106 2392.0 7 \n", - "39 0.384486 3.162278 31281.0 3 \n", - "40 3.522619 5.000000 3592.0 5 \n", - "41 -0.105077 4.242641 0.0 2 \n", - "42 -0.187140 4.000000 0.0 0 \n", - "43 4.929857 4.582576 0.0 2 \n", - "44 4.929857 4.582576 0.0 2 \n", - "45 -0.282096 4.242641 0.0 2 \n", - "46 0.251415 1.732051 4306.0 3 \n", - "47 2.799536 3.316625 0.0 1 \n", - "48 5.261181 4.123106 2392.0 7 \n", - "49 0.038355 1.732051 31281.0 2 \n", - "\n", - " haplo rep label \n", - "0 0 79 0 \n", - "1 1 79 0 \n", - "2 0 79 0 \n", - "3 1 79 0 \n", - "4 0 79 0 \n", - "5 1 79 0 \n", - "6 0 79 0 \n", - "7 1 79 0 \n", - "8 0 79 0 \n", - "9 1 79 0 \n", - "10 0 79 0 \n", - "11 1 79 0 \n", - "12 0 79 0 \n", - "13 1 79 0 \n", - "14 0 79 0 \n", - "15 1 79 0 \n", - "16 0 79 0 \n", - "17 1 79 0 \n", - "18 0 79 0 \n", - "19 1 79 0 \n", - "20 0 79 0 \n", - "21 1 79 0 \n", - "22 0 79 0 \n", - "23 1 79 0 \n", - "24 0 79 0 \n", - "25 1 79 0 \n", - "26 0 79 0 \n", - "27 1 79 0 \n", - "28 0 79 0 \n", - "29 1 79 0 \n", - "30 0 79 0 \n", - "31 1 79 0 \n", - "32 0 79 0 \n", - "33 1 79 0 \n", - "34 0 79 0 \n", - "35 1 79 0 \n", - "36 0 79 0 \n", - "37 1 79 0 \n", - "38 0 79 0 \n", - "39 1 79 0 \n", - "40 0 79 0 \n", - "41 1 79 0 \n", - "42 0 79 0 \n", - "43 1 79 0 \n", - "44 0 79 0 \n", - "45 1 79 0 \n", - "46 0 79 0 \n", - "47 1 79 0 \n", - "48 0 79 0 \n", - "49 1 79 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_29 50000 1.0 [1, 0, 0] 80\n", - "1 hap_0 tsk_32 50000 1.0 [1, 0, 0] 80\n", - "2 hap_0 tsk_37 50000 1.0 [1, 0, 0] 80\n", - "3 hap_0 tsk_48 50000 1.0 [1, 0, 0] 80\n", - "4 hap_1 tsk_40 50000 1.0 [1, 0, 0] 80\n", - "5 hap_1 tsk_48 50000 1.0 [1, 0, 0] 80\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 25 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 24 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 33 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 25 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 22 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 24 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 23 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 32 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 31 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 25 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 23 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 23 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 24 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 22 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 32 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 24 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 25 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 24 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 25 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 29 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 29 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 25 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 22 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 23 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 31 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 24 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 24 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 24 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 22 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 24 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 27 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 31 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 24 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 24 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 22 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 22 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 25 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 24 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 24 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 25 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 24 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 28 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 24 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 29 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 24 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 27 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 25 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 31 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 24 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 24 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.951944 3.058255 -0.981340 \\\n", - "1 ... 4.374480 3.763926 -0.973256 \n", - "2 ... 5.862819 1.847355 -2.317289 \n", - "3 ... 4.845897 3.017280 -0.938289 \n", - "4 ... 4.548270 3.893239 -1.249116 \n", - "5 ... 4.770040 3.986715 -1.209414 \n", - "6 ... 4.720913 3.252985 -1.146721 \n", - "7 ... 5.781919 1.829407 -2.236876 \n", - "8 ... 5.653243 3.660844 -1.873861 \n", - "9 ... 5.107307 3.495415 -1.204646 \n", - "10 ... 4.318131 3.413745 -0.812878 \n", - "11 ... 4.826835 1.841668 -1.142760 \n", - "12 ... 4.770040 3.986715 -1.209414 \n", - "13 ... 4.548270 3.893239 -1.249116 \n", - "14 ... 5.772773 3.255094 -1.799068 \n", - "15 ... 4.958574 4.032540 -1.300899 \n", - "16 ... 5.409283 1.719653 -1.096152 \n", - "17 ... 4.459333 3.134352 -0.800021 \n", - "18 ... 4.889775 3.750096 -1.208638 \n", - "19 ... 5.938041 2.959671 -1.956501 \n", - "20 ... 5.938041 2.959671 -1.956501 \n", - "21 ... 4.889775 3.750096 -1.208638 \n", - "22 ... 4.548270 3.893239 -1.249116 \n", - "23 ... 4.826835 1.841668 -1.142760 \n", - "24 ... 5.724453 2.970643 -1.713061 \n", - "25 ... 4.374480 3.763926 -0.973256 \n", - "26 ... 5.206108 2.316436 -1.040351 \n", - "27 ... 4.374480 3.763926 -0.973256 \n", - "28 ... 4.548270 3.893239 -1.249116 \n", - "29 ... 5.486279 1.720742 -1.528911 \n", - "30 ... 5.742205 2.127085 -1.815762 \n", - "31 ... 5.653243 3.660844 -1.873861 \n", - "32 ... 4.431048 3.345810 -0.876513 \n", - "33 ... 4.958574 4.032540 -1.300899 \n", - "34 ... 4.548270 3.893239 -1.249116 \n", - "35 ... 4.702929 2.862461 -1.072953 \n", - "36 ... 4.889775 3.750096 -1.208638 \n", - "37 ... 5.063847 3.177456 -1.110125 \n", - "38 ... 4.459333 3.134352 -0.800021 \n", - "39 ... 4.889775 3.750096 -1.208638 \n", - "40 ... 4.374480 3.763926 -0.973256 \n", - "41 ... 5.880736 2.716941 -1.778920 \n", - "42 ... 4.958574 4.032540 -1.300899 \n", - "43 ... 5.907252 2.124370 -1.953074 \n", - "44 ... 4.431048 3.345810 -0.876513 \n", - "45 ... 5.571659 1.496619 -1.746299 \n", - "46 ... 5.113959 1.507419 -1.419838 \n", - "47 ... 5.653243 3.660844 -1.873861 \n", - "48 ... 4.958574 4.032540 -1.300899 \n", - "49 ... 4.770040 3.986715 -1.209414 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.110913 3.316625 0.0 2 \\\n", - "1 0.085204 2.000000 0.0 3 \n", - "2 7.478441 3.872983 40033.0 10 \n", - "3 -0.155041 4.242641 1.0 3 \n", - "4 0.460985 3.872983 0.0 1 \n", - "5 0.264802 3.162278 0.0 1 \n", - "6 0.258452 4.000000 0.0 2 \n", - "7 7.097966 3.741657 40033.0 9 \n", - "8 2.934249 5.000000 31751.0 13 \n", - "9 0.478735 4.242641 7129.0 6 \n", - "10 -0.266625 1.732051 0.0 2 \n", - "11 1.913264 3.000000 0.0 1 \n", - "12 0.264802 3.162278 0.0 1 \n", - "13 0.460985 3.872983 0.0 1 \n", - "14 2.740759 5.099020 31751.0 14 \n", - "15 0.719276 4.123106 7129.0 5 \n", - "16 3.787216 4.795832 0.0 2 \n", - "17 -0.178785 2.000000 0.0 3 \n", - "18 0.306888 3.316625 0.0 2 \n", - "19 4.305501 4.898979 19030.0 6 \n", - "20 4.305501 4.898979 19030.0 6 \n", - "21 0.306888 3.316625 0.0 2 \n", - "22 0.460985 3.872983 0.0 1 \n", - "23 1.913264 3.000000 0.0 1 \n", - "24 2.592168 5.000000 31751.0 13 \n", - "25 0.085204 2.000000 0.0 3 \n", - "26 0.871777 4.123106 7126.0 6 \n", - "27 0.085204 2.000000 0.0 3 \n", - "28 0.460985 3.872983 0.0 1 \n", - "29 4.291841 5.000000 0.0 0 \n", - "30 4.181356 4.795832 0.0 0 \n", - "31 2.934249 5.000000 31751.0 13 \n", - "32 -0.026898 2.000000 0.0 3 \n", - "33 0.719276 4.123106 7129.0 5 \n", - "34 0.460985 3.872983 0.0 1 \n", - "35 0.192857 4.123106 0.0 2 \n", - "36 0.306888 3.316625 0.0 2 \n", - "37 0.297395 4.123106 7128.0 5 \n", - "38 -0.178785 2.000000 0.0 3 \n", - "39 0.306888 3.316625 0.0 2 \n", - "40 0.085204 2.000000 0.0 3 \n", - "41 3.642140 4.795832 19030.0 5 \n", - "42 0.719276 4.123106 7129.0 5 \n", - "43 4.892884 5.000000 0.0 2 \n", - "44 -0.026898 2.000000 0.0 3 \n", - "45 6.118929 2.828427 0.0 0 \n", - "46 4.181523 3.162278 13069.0 3 \n", - "47 2.934249 5.000000 31751.0 13 \n", - "48 0.719276 4.123106 7129.0 5 \n", - "49 0.264802 3.162278 0.0 1 \n", - "\n", - " haplo rep label \n", - "0 0 80 0 \n", - "1 1 80 0 \n", - "2 0 80 0 \n", - "3 1 80 0 \n", - "4 0 80 0 \n", - "5 1 80 0 \n", - "6 0 80 0 \n", - "7 1 80 0 \n", - "8 0 80 0 \n", - "9 1 80 0 \n", - "10 0 80 0 \n", - "11 1 80 0 \n", - "12 0 80 0 \n", - "13 1 80 0 \n", - "14 0 80 0 \n", - "15 1 80 0 \n", - "16 0 80 0 \n", - "17 1 80 0 \n", - "18 0 80 0 \n", - "19 1 80 0 \n", - "20 0 80 0 \n", - "21 1 80 0 \n", - "22 0 80 0 \n", - "23 1 80 0 \n", - "24 0 80 0 \n", - "25 1 80 0 \n", - "26 0 80 0 \n", - "27 1 80 0 \n", - "28 0 80 0 \n", - "29 1 80 0 \n", - "30 0 80 0 \n", - "31 1 80 0 \n", - "32 0 80 0 \n", - "33 1 80 0 \n", - "34 0 80 0 \n", - "35 1 80 0 \n", - "36 0 80 0 \n", - "37 1 80 0 \n", - "38 0 80 0 \n", - "39 1 80 0 \n", - "40 0 80 0 \n", - "41 1 80 0 \n", - "42 0 80 0 \n", - "43 1 80 0 \n", - "44 0 80 0 \n", - "45 1 80 0 \n", - "46 0 80 0 \n", - "47 1 80 0 \n", - "48 0 80 0 \n", - "49 1 80 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_28 50000 1.0 [1, 0, 0] 84\n", - "1 hap_0 tsk_29 50000 1.0 [1, 0, 0] 84\n", - "2 hap_0 tsk_31 50000 1.0 [1, 0, 0] 84\n", - "3 hap_0 tsk_32 50000 1.0 [1, 0, 0] 84\n", - "4 hap_0 tsk_35 50000 1.0 [1, 0, 0] 84\n", - "5 hap_0 tsk_38 50000 1.0 [1, 0, 0] 84\n", - "6 hap_0 tsk_41 50000 1.0 [1, 0, 0] 84\n", - "7 hap_0 tsk_49 50000 1.0 [1, 0, 0] 84\n", - "8 hap_1 tsk_27 50000 1.0 [1, 0, 0] 84\n", - "9 hap_1 tsk_32 50000 1.0 [1, 0, 0] 84\n", - "10 hap_1 tsk_36 50000 1.0 [1, 0, 0] 84\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 30 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 31 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 30 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 36 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 36 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 27 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 28 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 31 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 29 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 37 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 37 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 33 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 29 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 33 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 28 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 29 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 34 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 34 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 39 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 32 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 28 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 30 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 38 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 30 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 30 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 29 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 28 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 31 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 40 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 30 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 30 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 34 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 28 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 30 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 31 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 29 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 36 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 34 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 27 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 31 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 29 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 27 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 28 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 31 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 30 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 27 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 30 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 35 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 25 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 30 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.830989 4.281549 -0.722204 \\\n", - "1 ... 5.369203 2.831658 -1.625868 \n", - "2 ... 4.859273 4.047467 -0.648980 \n", - "3 ... 5.859262 2.689054 -1.214593 \n", - "4 ... 6.108469 3.826609 -1.258101 \n", - "5 ... 5.363792 4.609740 -1.133966 \n", - "6 ... 5.365842 5.387741 -1.285388 \n", - "7 ... 5.033368 3.765204 -0.804127 \n", - "8 ... 4.783392 3.899162 -0.560244 \n", - "9 ... 5.789655 2.379893 -1.183935 \n", - "10 ... 6.239437 3.209426 -1.229116 \n", - "11 ... 5.430180 1.813150 -1.839491 \n", - "12 ... 5.546372 4.457755 -1.203608 \n", - "13 ... 5.524459 2.500347 -1.412886 \n", - "14 ... 5.365842 5.387741 -1.285388 \n", - "15 ... 5.546372 4.457755 -1.203608 \n", - "16 ... 5.476663 2.706158 -2.116093 \n", - "17 ... 6.193796 2.576887 -1.387295 \n", - "18 ... 6.257504 2.143649 -1.742900 \n", - "19 ... 5.038039 3.998165 -0.705432 \n", - "20 ... 5.365842 5.387741 -1.285388 \n", - "21 ... 4.781001 4.602028 -0.738849 \n", - "22 ... 6.274737 3.687681 -1.319187 \n", - "23 ... 5.861773 2.499619 -1.401578 \n", - "24 ... 4.852405 3.994169 -0.594262 \n", - "25 ... 4.726919 4.396232 -0.701985 \n", - "26 ... 5.365842 5.387741 -1.285388 \n", - "27 ... 5.033368 3.765204 -0.804127 \n", - "28 ... 6.061166 2.362261 -1.533269 \n", - "29 ... 5.308499 2.639840 -1.471570 \n", - "30 ... 5.553880 3.014417 -1.895033 \n", - "31 ... 5.476663 2.706158 -2.116093 \n", - "32 ... 5.365842 5.387741 -1.285388 \n", - "33 ... 4.830989 4.281549 -0.722204 \n", - "34 ... 5.061652 3.519674 -0.692299 \n", - "35 ... 5.818604 2.403842 -1.438803 \n", - "36 ... 6.120185 3.723342 -1.234109 \n", - "37 ... 5.476663 2.706158 -2.116093 \n", - "38 ... 5.118573 2.420206 -1.315411 \n", - "39 ... 6.261630 1.171994 -3.922913 \n", - "40 ... 4.726919 4.396232 -0.701985 \n", - "41 ... 5.548163 2.037890 -0.995660 \n", - "42 ... 5.725367 1.120172 -3.021249 \n", - "43 ... 5.369203 2.831658 -1.625868 \n", - "44 ... 4.781001 4.602028 -0.738849 \n", - "45 ... 5.118573 2.420206 -1.315411 \n", - "46 ... 5.553880 3.014417 -1.895033 \n", - "47 ... 5.550064 1.736790 -2.092595 \n", - "48 ... 5.034382 2.434996 -0.569839 \n", - "49 ... 5.553880 3.014417 -1.895033 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.541523 5.385165 2067.0 7 \\\n", - "1 2.664615 1.732051 0.0 1 \n", - "2 -0.712210 5.385165 2067.0 7 \n", - "3 1.229744 3.000000 0.0 2 \n", - "4 0.839042 4.123106 0.0 2 \n", - "5 0.208806 4.795832 16651.0 8 \n", - "6 0.565301 4.898979 16651.0 9 \n", - "7 -0.089032 5.656854 2067.0 9 \n", - "8 -0.837060 5.291503 2067.0 6 \n", - "9 1.816737 3.464102 0.0 3 \n", - "10 1.238431 4.242641 8686.0 5 \n", - "11 4.492011 4.690416 8688.0 9 \n", - "12 0.520699 5.000000 29849.0 10 \n", - "13 2.047049 1.000000 0.0 1 \n", - "14 0.565301 4.898979 16651.0 9 \n", - "15 0.520699 5.000000 29849.0 10 \n", - "16 4.661594 4.472136 8688.0 7 \n", - "17 2.361758 3.605551 0.0 4 \n", - "18 4.358591 3.741657 0.0 3 \n", - "19 -0.576066 5.567764 2067.0 9 \n", - "20 0.565301 4.898979 16651.0 9 \n", - "21 -0.613170 5.385165 2067.0 7 \n", - "22 1.153079 4.358899 0.0 4 \n", - "23 3.153790 2.000000 0.0 1 \n", - "24 -0.802904 5.385165 2067.0 7 \n", - "25 -0.626576 5.291503 2067.0 6 \n", - "26 0.565301 4.898979 16651.0 9 \n", - "27 -0.089032 5.656854 2067.0 9 \n", - "28 3.110877 2.449490 0.0 4 \n", - "29 2.064718 1.414214 0.0 0 \n", - "30 3.516515 3.605551 219.0 3 \n", - "31 4.661594 4.472136 8688.0 7 \n", - "32 0.565301 4.898979 16651.0 9 \n", - "33 -0.541523 5.385165 2067.0 7 \n", - "34 -0.387601 5.656854 2067.0 9 \n", - "35 3.423592 1.732051 0.0 0 \n", - "36 0.795203 4.123106 0.0 2 \n", - "37 4.661594 4.472136 8688.0 7 \n", - "38 2.302691 4.690416 1679.0 4 \n", - "39 20.055157 4.123106 0.0 4 \n", - "40 -0.626576 5.291503 2067.0 6 \n", - "41 2.417941 3.872983 0.0 1 \n", - "42 14.643995 2.828427 0.0 2 \n", - "43 2.664615 1.732051 0.0 1 \n", - "44 -0.613170 5.385165 2067.0 7 \n", - "45 2.302691 4.690416 1679.0 4 \n", - "46 3.516515 3.605551 219.0 3 \n", - "47 5.726208 4.690416 8688.0 9 \n", - "48 0.231925 5.385165 2067.0 7 \n", - "49 3.516515 3.605551 219.0 3 \n", - "\n", - " haplo rep label \n", - "0 0 84 0 \n", - "1 1 84 0 \n", - "2 0 84 0 \n", - "3 1 84 0 \n", - "4 0 84 0 \n", - "5 1 84 0 \n", - "6 0 84 0 \n", - "7 1 84 0 \n", - "8 0 84 0 \n", - "9 1 84 0 \n", - "10 0 84 0 \n", - "11 1 84 0 \n", - "12 0 84 0 \n", - "13 1 84 0 \n", - "14 0 84 0 \n", - "15 1 84 0 \n", - "16 0 84 0 \n", - "17 1 84 0 \n", - "18 0 84 0 \n", - "19 1 84 0 \n", - "20 0 84 0 \n", - "21 1 84 0 \n", - "22 0 84 0 \n", - "23 1 84 0 \n", - "24 0 84 0 \n", - "25 1 84 0 \n", - "26 0 84 0 \n", - "27 1 84 0 \n", - "28 0 84 0 \n", - "29 1 84 0 \n", - "30 0 84 0 \n", - "31 1 84 0 \n", - "32 0 84 0 \n", - "33 1 84 0 \n", - "34 0 84 0 \n", - "35 1 84 0 \n", - "36 0 84 0 \n", - "37 1 84 0 \n", - "38 0 84 0 \n", - "39 1 84 0 \n", - "40 0 84 0 \n", - "41 1 84 0 \n", - "42 0 84 0 \n", - "43 1 84 0 \n", - "44 0 84 0 \n", - "45 1 84 0 \n", - "46 0 84 0 \n", - "47 1 84 0 \n", - "48 0 84 0 \n", - "49 1 84 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_26 12770 0.25540 [0, 1, 0] 90\n", - "1 hap_0 tsk_35 12770 0.25540 [0, 1, 0] 90\n", - "2 hap_0 tsk_39 27321 0.54642 [0, 0, 1] 90\n", - "3 hap_0 tsk_41 8071 0.16142 [0, 1, 0] 90\n", - "4 hap_0 tsk_44 27321 0.54642 [0, 0, 1] 90\n", - "5 hap_0 tsk_45 8071 0.16142 [0, 1, 0] 90\n", - "6 hap_1 tsk_29 27321 0.54642 [0, 0, 1] 90\n", - "7 hap_1 tsk_30 27321 0.54642 [0, 0, 1] 90\n", - "8 hap_1 tsk_32 27321 0.54642 [0, 0, 1] 90\n", - "9 hap_1 tsk_37 8071 0.16142 [0, 1, 0] 90\n", - "10 hap_1 tsk_43 12770 0.25540 [0, 1, 0] 90\n", - "11 hap_1 tsk_45 8071 0.16142 [0, 1, 0] 90\n", - "12 hap_1 tsk_46 27321 0.54642 [0, 0, 1] 90\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 24 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 19 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 21 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 26 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 22 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 23 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 16 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 22 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 21 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 21 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 23 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 24 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 21 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 22 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 19 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 22 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 21 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 23 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 20 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 22 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 22 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 22 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 24 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 22 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 20 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 21 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 21 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 23 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 24 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 22 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 20 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 22 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 20 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 20 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 23 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 23 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 19 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 21 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 24 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 17 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 21 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 21 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 26 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 25 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 21 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 22 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 20 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 16 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 16 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 19 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 4.574760 1.071570 -2.544530 \\\n", - "1 ... 4.524783 0.606337 -3.865441 \n", - "2 ... 3.829285 1.416575 -1.149385 \n", - "3 ... 5.236052 0.703765 -4.850540 \n", - "4 ... 3.815094 1.405057 -1.084336 \n", - "5 ... 4.842788 1.267403 -3.382998 \n", - "6 ... 4.026145 1.590153 -2.010737 \n", - "7 ... 3.942865 1.453817 -1.353449 \n", - "8 ... 4.450927 1.149249 -2.702976 \n", - "9 ... 4.111022 0.659494 -2.293741 \n", - "10 ... 3.709506 1.439562 -0.885105 \n", - "11 ... 5.055400 2.122926 -2.890970 \n", - "12 ... 4.401974 0.822627 -2.431749 \n", - "13 ... 4.114828 1.068187 -1.414791 \n", - "14 ... 4.122352 0.806215 -1.662254 \n", - "15 ... 3.789036 0.723203 -1.505068 \n", - "16 ... 3.843449 1.307901 -1.010226 \n", - "17 ... 3.667868 1.626743 -1.024349 \n", - "18 ... 4.328256 1.266196 -2.732996 \n", - "19 ... 4.205833 0.710965 -2.190315 \n", - "20 ... 3.967571 1.298383 -1.082235 \n", - "21 ... 3.675892 1.087820 -0.778482 \n", - "22 ... 3.931634 1.062253 -1.086464 \n", - "23 ... 3.797933 1.295706 -0.982242 \n", - "24 ... 4.498874 0.760136 -2.344887 \n", - "25 ... 3.872734 1.441928 -0.844503 \n", - "26 ... 3.999519 0.963847 -0.710141 \n", - "27 ... 3.667868 1.626743 -1.024349 \n", - "28 ... 5.055400 2.122926 -2.890970 \n", - "29 ... 3.942865 1.453817 -1.353449 \n", - "30 ... 3.558978 1.093677 -0.704929 \n", - "31 ... 3.815094 1.405057 -1.084336 \n", - "32 ... 3.737693 1.509648 -0.781648 \n", - "33 ... 4.328256 1.266196 -2.732996 \n", - "34 ... 4.470227 1.057073 -2.421575 \n", - "35 ... 4.842788 1.267403 -3.382998 \n", - "36 ... 4.110241 0.825920 -1.597134 \n", - "37 ... 3.829285 1.416575 -1.149385 \n", - "38 ... 5.055400 2.122926 -2.890970 \n", - "39 ... 4.172304 1.351876 -1.899868 \n", - "40 ... 3.825748 1.643656 -1.009135 \n", - "41 ... 3.825748 1.643656 -1.009135 \n", - "42 ... 5.015741 1.002341 -3.296584 \n", - "43 ... 5.179079 1.817138 -2.873725 \n", - "44 ... 3.461823 1.455783 -0.708066 \n", - "45 ... 3.597422 1.378558 -0.774890 \n", - "46 ... 4.463537 0.916838 -1.555138 \n", - "47 ... 4.026145 1.590153 -2.010737 \n", - "48 ... 4.026145 1.590153 -2.010737 \n", - "49 ... 4.048052 0.813271 -1.318367 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 7.810020 3.000000 0.0 3 \\\n", - "1 20.111866 4.123106 0.0 4 \n", - "2 2.599061 3.316625 3890.0 4 \n", - "3 27.449186 3.605551 0.0 10 \n", - "4 2.070395 2.449490 31296.0 3 \n", - "5 11.328588 3.741657 6142.0 4 \n", - "6 4.164739 4.123106 1.0 2 \n", - "7 3.058982 4.000000 83.0 5 \n", - "8 7.401618 3.872983 0.0 4 \n", - "9 11.023057 3.316625 12677.0 5 \n", - "10 0.513020 1.732051 0.0 2 \n", - "11 6.924424 4.472136 14457.0 9 \n", - "12 8.973036 4.123106 83.0 5 \n", - "13 4.067382 4.242641 83.0 6 \n", - "14 7.537369 3.316625 0.0 4 \n", - "15 5.822569 2.828427 83.0 3 \n", - "16 2.128659 3.872983 83.0 4 \n", - "17 0.839959 1.732051 0.0 2 \n", - "18 7.439181 3.741657 0.0 3 \n", - "19 10.314453 2.449490 3425.0 4 \n", - "20 2.492918 3.464102 3890.0 5 \n", - "21 1.677389 2.449490 0.0 2 \n", - "22 2.888299 2.828427 0.0 4 \n", - "23 1.056416 2.000000 0.0 2 \n", - "24 12.166149 3.316625 0.0 2 \n", - "25 1.286529 3.000000 5832.0 4 \n", - "26 3.766841 3.605551 0.0 4 \n", - "27 0.839959 1.732051 0.0 2 \n", - "28 6.924424 4.472136 14457.0 9 \n", - "29 3.058982 4.000000 83.0 5 \n", - "30 1.050565 2.000000 0.0 1 \n", - "31 2.070395 2.449490 31296.0 3 \n", - "32 0.968278 2.828427 5832.0 3 \n", - "33 7.439181 3.741657 0.0 3 \n", - "34 7.131872 2.828427 0.0 2 \n", - "35 11.328588 3.741657 6142.0 4 \n", - "36 6.998788 3.316625 3425.0 4 \n", - "37 2.599061 3.316625 3890.0 4 \n", - "38 6.924424 4.472136 14457.0 9 \n", - "39 3.956753 4.242641 1.0 3 \n", - "40 1.551578 3.000000 5832.0 4 \n", - "41 1.551578 3.000000 5832.0 4 \n", - "42 12.053942 4.123106 6141.0 5 \n", - "43 6.932371 4.582576 14457.0 10 \n", - "44 0.001941 1.000000 0.0 0 \n", - "45 0.390780 1.414214 0.0 0 \n", - "46 7.331850 4.000000 0.0 7 \n", - "47 4.164739 4.123106 1.0 2 \n", - "48 4.164739 4.123106 1.0 2 \n", - "49 6.389800 3.464102 0.0 2 \n", - "\n", - " haplo rep label \n", - "0 0 90 0 \n", - "1 1 90 0 \n", - "2 0 90 0 \n", - "3 1 90 0 \n", - "4 0 90 0 \n", - "5 1 90 0 \n", - "6 0 90 0 \n", - "7 1 90 0 \n", - "8 0 90 0 \n", - "9 1 90 0 \n", - "10 0 90 0 \n", - "11 1 90 0 \n", - "12 0 90 0 \n", - "13 1 90 0 \n", - "14 0 90 0 \n", - "15 1 90 0 \n", - "16 0 90 0 \n", - "17 1 90 0 \n", - "18 0 90 0 \n", - "19 1 90 0 \n", - "20 0 90 0 \n", - "21 1 90 0 \n", - "22 0 90 0 \n", - "23 1 90 0 \n", - "24 0 90 0 \n", - "25 1 90 0 \n", - "26 0 90 0 \n", - "27 1 90 0 \n", - "28 0 90 0 \n", - "29 1 90 0 \n", - "30 0 90 0 \n", - "31 1 90 0 \n", - "32 0 90 0 \n", - "33 1 90 0 \n", - "34 0 90 0 \n", - "35 1 90 0 \n", - "36 0 90 0 \n", - "37 1 90 0 \n", - "38 0 90 0 \n", - "39 1 90 0 \n", - "40 0 90 0 \n", - "41 1 90 0 \n", - "42 0 90 0 \n", - "43 1 90 0 \n", - "44 0 90 0 \n", - "45 1 90 0 \n", - "46 0 90 0 \n", - "47 1 90 0 \n", - "48 0 90 0 \n", - "49 1 90 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_48 28382 0.56764 [0, 0, 1] 91\n", - "1 hap_1 tsk_34 28382 0.56764 [0, 0, 1] 91\n", - "2 hap_1 tsk_40 28382 0.56764 [0, 0, 1] 91\n", - "3 hap_1 tsk_49 28382 0.56764 [0, 0, 1] 91\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 29 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 32 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 30 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 29 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 25 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 28 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 31 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 29 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 30 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 30 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 27 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 35 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 30 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 35 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 26 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 30 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 22 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 34 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 26 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 22 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 23 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 29 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 30 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 26 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 26 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 30 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 25 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 33 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 35 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 25 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 24 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 22 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 27 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 28 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 28 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 27 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 28 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 27 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 27 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 25 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 34 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 28 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 23 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 22 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 22 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 31 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 22 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 25 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 25 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 28 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 5.348291 1.835779 -1.746929 \\\n", - "1 ... 5.481574 1.192346 -2.117202 \n", - "2 ... 5.332053 2.929209 -1.876438 \n", - "3 ... 4.847268 2.503991 -0.423265 \n", - "4 ... 4.488032 3.297566 -0.130058 \n", - "5 ... 5.082436 1.408847 -1.099461 \n", - "6 ... 4.979384 3.325732 -0.299975 \n", - "7 ... 5.385929 2.031773 -1.456982 \n", - "8 ... 4.864765 2.174057 -1.074482 \n", - "9 ... 5.308825 1.296375 -1.953130 \n", - "10 ... 5.181195 2.195221 -1.000441 \n", - "11 ... 6.340837 2.753785 -3.282000 \n", - "12 ... 5.332053 2.929209 -1.876438 \n", - "13 ... 6.340837 2.753785 -3.282000 \n", - "14 ... 5.127729 1.386396 -1.052339 \n", - "15 ... 5.332053 2.929209 -1.876438 \n", - "16 ... 4.312966 3.918328 -0.506408 \n", - "17 ... 6.673279 3.107347 -2.754436 \n", - "18 ... 5.278206 1.580546 -1.454508 \n", - "19 ... 5.137487 2.046232 -1.930465 \n", - "20 ... 4.439672 3.729316 -0.509632 \n", - "21 ... 5.320450 2.532813 -1.294397 \n", - "22 ... 4.864765 2.174057 -1.074482 \n", - "23 ... 5.978131 1.141954 -3.571862 \n", - "24 ... 5.082380 2.249413 -0.968849 \n", - "25 ... 4.873120 3.412706 -0.258819 \n", - "26 ... 5.715952 1.407891 -2.162814 \n", - "27 ... 6.628446 2.823706 -2.645420 \n", - "28 ... 6.340837 2.753785 -3.282000 \n", - "29 ... 4.393777 3.894728 -0.364210 \n", - "30 ... 4.508938 3.629482 -0.355970 \n", - "31 ... 5.137487 2.046232 -1.930465 \n", - "32 ... 4.409419 4.637028 -0.437945 \n", - "33 ... 5.141317 1.766863 -0.941184 \n", - "34 ... 4.574487 4.114067 -0.331013 \n", - "35 ... 4.409419 4.637028 -0.437945 \n", - "36 ... 5.263312 2.577546 -1.277992 \n", - "37 ... 4.409419 4.637028 -0.437945 \n", - "38 ... 4.409419 4.637028 -0.437945 \n", - "39 ... 4.393777 3.894728 -0.364210 \n", - "40 ... 6.673279 3.107347 -2.754436 \n", - "41 ... 5.263312 2.577546 -1.277992 \n", - "42 ... 4.439672 3.729316 -0.509632 \n", - "43 ... 4.312966 3.918328 -0.506408 \n", - "44 ... 4.312966 3.918328 -0.506408 \n", - "45 ... 5.017368 2.426023 -0.306533 \n", - "46 ... 5.197073 1.470437 -1.614727 \n", - "47 ... 4.393777 3.894728 -0.364210 \n", - "48 ... 4.983545 2.284277 -0.907135 \n", - "49 ... 5.360440 2.185678 -1.315472 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 3.886796 4.472136 0.0 2 \\\n", - "1 10.797832 4.898979 0.0 3 \n", - "2 3.056947 4.472136 0.0 2 \n", - "3 -0.076164 3.872983 2538.0 3 \n", - "4 -0.949461 3.316625 0.0 2 \n", - "5 5.008627 4.690416 0.0 4 \n", - "6 -0.626993 4.123106 28452.0 7 \n", - "7 2.416272 4.358899 0.0 3 \n", - "8 2.441633 3.741657 0.0 1 \n", - "9 8.212099 4.690416 13970.0 4 \n", - "10 2.232112 3.316625 13970.0 6 \n", - "11 9.831353 4.242641 6924.0 2 \n", - "12 3.056947 4.472136 0.0 2 \n", - "13 9.831353 4.242641 6924.0 2 \n", - "14 5.907519 3.741657 0.0 1 \n", - "15 3.056947 4.472136 0.0 2 \n", - "16 -0.547414 3.162278 0.0 0 \n", - "17 7.437953 2.645751 26309.0 3 \n", - "18 4.490443 4.358899 0.0 1 \n", - "19 4.270450 3.741657 12521.0 2 \n", - "20 -0.501813 3.316625 0.0 1 \n", - "21 1.259080 4.582576 0.0 1 \n", - "22 2.441633 3.741657 0.0 1 \n", - "23 16.964514 4.795832 0.0 4 \n", - "24 2.060934 3.162278 13970.0 5 \n", - "25 -0.769558 4.000000 28452.0 6 \n", - "26 8.416100 2.000000 0.0 1 \n", - "27 6.967290 2.449490 26309.0 2 \n", - "28 9.831353 4.242641 6924.0 2 \n", - "29 -0.625812 3.316625 2538.0 2 \n", - "30 -0.895146 3.464102 0.0 2 \n", - "31 4.270450 3.741657 12521.0 2 \n", - "32 -0.749297 3.605551 1.0 3 \n", - "33 3.151936 4.000000 13970.0 4 \n", - "34 -0.907103 3.741657 1.0 4 \n", - "35 -0.749297 3.605551 1.0 3 \n", - "36 1.227943 4.472136 0.0 1 \n", - "37 -0.749297 3.605551 1.0 3 \n", - "38 -0.749297 3.605551 1.0 3 \n", - "39 -0.625812 3.316625 2538.0 2 \n", - "40 7.437953 2.645751 26309.0 3 \n", - "41 1.227943 4.472136 0.0 1 \n", - "42 -0.501813 3.316625 0.0 1 \n", - "43 -0.547414 3.162278 0.0 0 \n", - "44 -0.547414 3.162278 0.0 0 \n", - "45 0.364378 4.358899 0.0 5 \n", - "46 5.621636 3.872983 12521.0 3 \n", - "47 -0.625812 3.316625 2538.0 2 \n", - "48 1.803998 3.000000 13970.0 4 \n", - "49 1.851698 4.472136 0.0 1 \n", - "\n", - " haplo rep label \n", - "0 0 91 0 \n", - "1 1 91 0 \n", - "2 0 91 0 \n", - "3 1 91 0 \n", - "4 0 91 0 \n", - "5 1 91 0 \n", - "6 0 91 0 \n", - "7 1 91 0 \n", - "8 0 91 0 \n", - "9 1 91 0 \n", - "10 0 91 0 \n", - "11 1 91 0 \n", - "12 0 91 0 \n", - "13 1 91 0 \n", - "14 0 91 0 \n", - "15 1 91 0 \n", - "16 0 91 0 \n", - "17 1 91 0 \n", - "18 0 91 0 \n", - "19 1 91 0 \n", - "20 0 91 0 \n", - "21 1 91 0 \n", - "22 0 91 0 \n", - "23 1 91 0 \n", - "24 0 91 0 \n", - "25 1 91 0 \n", - "26 0 91 0 \n", - "27 1 91 0 \n", - "28 0 91 0 \n", - "29 1 91 0 \n", - "30 0 91 0 \n", - "31 1 91 0 \n", - "32 0 91 0 \n", - "33 1 91 0 \n", - "34 0 91 0 \n", - "35 1 91 0 \n", - "36 0 91 0 \n", - "37 1 91 0 \n", - "38 0 91 0 \n", - "39 1 91 0 \n", - "40 0 91 0 \n", - "41 1 91 0 \n", - "42 0 91 0 \n", - "43 1 91 0 \n", - "44 0 91 0 \n", - "45 1 91 0 \n", - "46 0 91 0 \n", - "47 1 91 0 \n", - "48 0 91 0 \n", - "49 1 91 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "ambig\n", - "einfach nur so da\n", - "show true tractl\n", - " hap ind len prop label rep\n", - "0 hap_0 tsk_39 50000 1.0 [1, 0, 0] 93\n", - "1 hap_1 tsk_31 50000 1.0 [1, 0, 0] 93\n", - "2 hap_1 tsk_41 50000 1.0 [1, 0, 0] 93\n", - "und der feature_df\n", - " chrom start end sample 0-ton 1-ton 2-ton 3-ton 4-ton 5-ton \n", - "0 1 0 50000 tsk_25 0 37 0 0 0 0 \\\n", - "1 1 0 50000 tsk_25 0 45 0 0 0 0 \n", - "2 1 0 50000 tsk_26 0 30 0 0 0 0 \n", - "3 1 0 50000 tsk_26 0 42 0 0 0 0 \n", - "4 1 0 50000 tsk_27 0 43 0 0 0 0 \n", - "5 1 0 50000 tsk_27 0 38 0 0 0 0 \n", - "6 1 0 50000 tsk_28 0 43 0 0 0 0 \n", - "7 1 0 50000 tsk_28 0 44 0 0 0 0 \n", - "8 1 0 50000 tsk_29 0 40 0 0 0 0 \n", - "9 1 0 50000 tsk_29 0 42 0 0 0 0 \n", - "10 1 0 50000 tsk_30 0 39 0 0 0 0 \n", - "11 1 0 50000 tsk_30 0 39 0 0 0 0 \n", - "12 1 0 50000 tsk_31 0 41 0 0 0 0 \n", - "13 1 0 50000 tsk_31 0 43 0 0 0 0 \n", - "14 1 0 50000 tsk_32 0 37 0 0 0 0 \n", - "15 1 0 50000 tsk_32 0 37 0 0 0 0 \n", - "16 1 0 50000 tsk_33 0 44 0 0 0 0 \n", - "17 1 0 50000 tsk_33 0 36 0 0 0 0 \n", - "18 1 0 50000 tsk_34 0 40 0 0 0 0 \n", - "19 1 0 50000 tsk_34 0 39 0 0 0 0 \n", - "20 1 0 50000 tsk_35 0 30 0 0 0 0 \n", - "21 1 0 50000 tsk_35 0 44 0 0 0 0 \n", - "22 1 0 50000 tsk_36 0 39 0 0 0 0 \n", - "23 1 0 50000 tsk_36 0 41 0 0 0 0 \n", - "24 1 0 50000 tsk_37 0 37 0 0 0 0 \n", - "25 1 0 50000 tsk_37 0 44 0 0 0 0 \n", - "26 1 0 50000 tsk_38 0 44 0 0 0 0 \n", - "27 1 0 50000 tsk_38 0 38 0 0 0 0 \n", - "28 1 0 50000 tsk_39 0 40 0 0 0 0 \n", - "29 1 0 50000 tsk_39 0 30 0 0 0 0 \n", - "30 1 0 50000 tsk_40 0 45 0 0 0 0 \n", - "31 1 0 50000 tsk_40 0 44 0 0 0 0 \n", - "32 1 0 50000 tsk_41 0 39 0 0 0 0 \n", - "33 1 0 50000 tsk_41 0 40 0 0 0 0 \n", - "34 1 0 50000 tsk_42 0 37 0 0 0 0 \n", - "35 1 0 50000 tsk_42 0 39 0 0 0 0 \n", - "36 1 0 50000 tsk_43 0 40 0 0 0 0 \n", - "37 1 0 50000 tsk_43 0 41 0 0 0 0 \n", - "38 1 0 50000 tsk_44 0 38 0 0 0 0 \n", - "39 1 0 50000 tsk_44 0 36 0 0 0 0 \n", - "40 1 0 50000 tsk_45 0 45 0 0 0 0 \n", - "41 1 0 50000 tsk_45 0 36 0 0 0 0 \n", - "42 1 0 50000 tsk_46 0 44 0 0 0 0 \n", - "43 1 0 50000 tsk_46 0 45 0 0 0 0 \n", - "44 1 0 50000 tsk_47 0 40 0 0 0 0 \n", - "45 1 0 50000 tsk_47 0 40 0 0 0 0 \n", - "46 1 0 50000 tsk_48 0 39 0 0 0 0 \n", - "47 1 0 50000 tsk_48 0 39 0 0 0 0 \n", - "48 1 0 50000 tsk_49 0 42 0 0 0 0 \n", - "49 1 0 50000 tsk_49 0 44 0 0 0 0 \n", - "\n", - " ... mean_pairwised_dist var_pairwised_dist skew_pairwised_dist \n", - "0 ... 3.611909 2.894116 -0.145342 \\\n", - "1 ... 4.062100 2.919340 -0.592700 \n", - "2 ... 5.374230 2.297650 -2.425019 \n", - "3 ... 3.740937 2.905390 -0.437237 \n", - "4 ... 3.911573 2.559595 -0.334528 \n", - "5 ... 3.875022 2.364201 -0.117239 \n", - "6 ... 4.062878 1.753021 0.231000 \n", - "7 ... 5.445777 2.603515 -2.590833 \n", - "8 ... 3.830136 2.270058 -0.343108 \n", - "9 ... 3.740937 2.905390 -0.437237 \n", - "10 ... 3.723999 2.311835 -0.384443 \n", - "11 ... 3.671366 1.901068 0.321393 \n", - "12 ... 4.101162 2.200470 -0.357109 \n", - "13 ... 5.844535 1.061406 -3.524927 \n", - "14 ... 3.730668 2.502119 -0.090487 \n", - "15 ... 3.736831 2.496096 -0.145440 \n", - "16 ... 3.924047 3.101856 -0.590733 \n", - "17 ... 4.348483 2.310699 -0.550793 \n", - "18 ... 3.962415 2.719264 -0.231933 \n", - "19 ... 3.723999 2.311835 -0.384443 \n", - "20 ... 5.374230 2.297650 -2.425019 \n", - "21 ... 5.445777 2.603515 -2.590833 \n", - "22 ... 3.723999 2.311835 -0.384443 \n", - "23 ... 4.101162 2.200470 -0.357109 \n", - "24 ... 3.736831 2.496096 -0.145440 \n", - "25 ... 3.924047 3.101856 -0.590733 \n", - "26 ... 3.924047 3.101856 -0.590733 \n", - "27 ... 3.727584 2.565120 -0.023833 \n", - "28 ... 6.820088 2.066396 -4.264530 \n", - "29 ... 5.374230 2.297650 -2.425019 \n", - "30 ... 5.563738 2.264815 -2.534477 \n", - "31 ... 4.045926 2.370487 -0.328401 \n", - "32 ... 3.794486 3.061875 -0.334014 \n", - "33 ... 6.820088 2.066396 -4.264530 \n", - "34 ... 3.611909 2.894116 -0.145342 \n", - "35 ... 3.794486 3.061875 -0.334014 \n", - "36 ... 3.806575 1.849990 0.223622 \n", - "37 ... 3.976626 2.086443 -0.280860 \n", - "38 ... 3.875022 2.364201 -0.117239 \n", - "39 ... 4.348483 2.310699 -0.550793 \n", - "40 ... 4.062100 2.919340 -0.592700 \n", - "41 ... 5.757775 1.228026 -2.137913 \n", - "42 ... 4.114937 1.967294 -0.186831 \n", - "43 ... 4.234617 1.928015 -0.279484 \n", - "44 ... 3.830136 2.270058 -0.343108 \n", - "45 ... 5.854059 0.949997 -4.108232 \n", - "46 ... 3.794486 3.061875 -0.334014 \n", - "47 ... 3.780567 1.967313 -0.040774 \n", - "48 ... 3.740937 2.905390 -0.437237 \n", - "49 ... 5.445777 2.603515 -2.590833 \n", - "\n", - " kurtosis_pairwised_dist min_dist_to_ref S*_score private_SNP_num \n", - "0 -0.537496 2.000000 0.0 1 \\\n", - "1 -0.062074 2.449490 18126.0 4 \n", - "2 6.770324 1.000000 0.0 0 \n", - "3 -0.231055 1.732051 0.0 1 \n", - "4 -0.274764 2.000000 0.0 2 \n", - "5 -0.203754 2.236068 0.0 3 \n", - "6 0.919122 2.449490 0.0 4 \n", - "7 6.059619 5.196152 37221.0 6 \n", - "8 0.427385 2.645751 10654.0 4 \n", - "9 -0.231055 1.732051 0.0 1 \n", - "10 0.603318 2.449490 10654.0 3 \n", - "11 0.326100 1.414214 0.0 1 \n", - "12 0.746433 2.828427 11058.0 5 \n", - "13 18.006522 5.291503 24795.0 14 \n", - "14 -0.375365 2.000000 0.0 2 \n", - "15 -0.093169 2.000000 0.0 1 \n", - "16 -0.148604 2.236068 18126.0 3 \n", - "17 0.998137 3.316625 0.0 2 \n", - "18 -0.367727 2.645751 0.0 4 \n", - "19 0.603318 2.449490 10654.0 3 \n", - "20 6.770324 1.000000 0.0 0 \n", - "21 6.059619 5.196152 37221.0 6 \n", - "22 0.603318 2.449490 10654.0 3 \n", - "23 0.746433 2.828427 11058.0 5 \n", - "24 -0.093169 2.000000 0.0 1 \n", - "25 -0.148604 2.236068 18126.0 3 \n", - "26 -0.148604 2.236068 18126.0 3 \n", - "27 -0.379871 2.236068 0.0 1 \n", - "28 17.324596 6.403124 35966.0 15 \n", - "29 6.770324 1.000000 0.0 0 \n", - "30 5.952122 5.291503 37221.0 7 \n", - "31 -0.118107 2.236068 5854.0 3 \n", - "32 -0.315835 2.449490 0.0 3 \n", - "33 17.324596 6.403124 35966.0 15 \n", - "34 -0.537496 2.000000 0.0 1 \n", - "35 -0.315835 2.449490 0.0 3 \n", - "36 0.545309 1.732051 0.0 2 \n", - "37 0.429831 2.828427 10654.0 5 \n", - "38 -0.203754 2.236068 0.0 3 \n", - "39 0.998137 3.316625 0.0 2 \n", - "40 -0.062074 2.449490 18126.0 4 \n", - "41 12.904041 4.690416 5854.0 6 \n", - "42 0.565471 2.000000 0.0 0 \n", - "43 0.846566 2.236068 0.0 1 \n", - "44 0.427385 2.645751 10654.0 4 \n", - "45 23.330780 5.385165 4392.0 13 \n", - "46 -0.315835 2.449490 0.0 3 \n", - "47 0.279160 2.449490 10654.0 3 \n", - "48 -0.231055 1.732051 0.0 1 \n", - "49 6.059619 5.196152 37221.0 6 \n", - "\n", - " haplo rep label \n", - "0 0 93 0 \n", - "1 1 93 0 \n", - "2 0 93 0 \n", - "3 1 93 0 \n", - "4 0 93 0 \n", - "5 1 93 0 \n", - "6 0 93 0 \n", - "7 1 93 0 \n", - "8 0 93 0 \n", - "9 1 93 0 \n", - "10 0 93 0 \n", - "11 1 93 0 \n", - "12 0 93 0 \n", - "13 1 93 0 \n", - "14 0 93 0 \n", - "15 1 93 0 \n", - "16 0 93 0 \n", - "17 1 93 0 \n", - "18 0 93 0 \n", - "19 1 93 0 \n", - "20 0 93 0 \n", - "21 1 93 0 \n", - "22 0 93 0 \n", - "23 1 93 0 \n", - "24 0 93 0 \n", - "25 1 93 0 \n", - "26 0 93 0 \n", - "27 1 93 0 \n", - "28 0 93 0 \n", - "29 1 93 0 \n", - "30 0 93 0 \n", - "31 1 93 0 \n", - "32 0 93 0 \n", - "33 1 93 0 \n", - "34 0 93 0 \n", - "35 1 93 0 \n", - "36 0 93 0 \n", - "37 1 93 0 \n", - "38 0 93 0 \n", - "39 1 93 0 \n", - "40 0 93 0 \n", - "41 1 93 0 \n", - "42 0 93 0 \n", - "43 1 93 0 \n", - "44 0 93 0 \n", - "45 1 93 0 \n", - "46 0 93 0 \n", - "47 1 93 0 \n", - "48 0 93 0 \n", - "49 1 93 0 \n", - "\n", - "[50 rows x 115 columns]\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n", - "Adding label 1\n", - "einfach nur so da\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/scipy/optimize/_linesearch.py:457: LineSearchWarning: The line search algorithm did not converge\n", - " warn('The line search algorithm did not converge', LineSearchWarning)\n", - "/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/scipy/optimize/_linesearch.py:306: LineSearchWarning: The line search algorithm did not converge\n", - " warn('The line search algorithm did not converge', LineSearchWarning)\n", - "/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/scipy/optimize/_linesearch.py:416: LineSearchWarning: Rounding errors prevent the line search from converging\n", - " warn(msg, LineSearchWarning)\n", - "/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/sklearn/utils/optimize.py:203: UserWarning: Line Search failed\n", - " warnings.warn(\"Line Search failed\")\n" - ] - } - ], - "source": [ - "#train logistisc classifier with simulated data\n", - "train.train_parameters_archienew_fin(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "#import pandas as pd\n", - "\n", - "#df = pd.read_csv(os.path.join(output_dir, \"features_final.csv\"))\n", - "#train.train_statsmodels(df, \"statsmodels_training.pickle\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/scipy/optimize/_linesearch.py:457: LineSearchWarning: The line search algorithm did not converge\n", - " warn('The line search algorithm did not converge', LineSearchWarning)\n", - "/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/scipy/optimize/_linesearch.py:306: LineSearchWarning: The line search algorithm did not converge\n", - " warn('The line search algorithm did not converge', LineSearchWarning)\n" - ] - } - ], - "source": [ - "#import pandas as pd\n", - "\n", - "#df = pd.read_csv(os.path.join(output_dir, \"features_final.csv\"))\n", - "#train.train_scikit(df, \"scikit_training.pickle\")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "#test_parameters\n", - "demo_model_file=\"./examples/models/archie2.yaml\"\n", - "nrep = 10\n", - "nref = 25\n", - "ntgt= 25\n", - "ref_id = 'N1'\n", - "tgt_id = 'N2'\n", - "src_id = 'Na'\n", - "seq_len = 1000000\n", - "mut_rate = 1.25e-8\n", - "rec_rate = 1e-8\n", - "thread = 6\n", - "seed = None\n", - "preprocess.store_global_parameters(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "#output filenames for models\n", - "scikitfile = output_prefix + \".scikit.pickle\"\n", - "statsmodelsfile = output_prefix + \".statsmodels.pickle\"" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Process Process-608:\n", - "Process Process-609:\n", - "Process Process-607:\n", - "Traceback (most recent call last):\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 315, in _bootstrap\n", - " self.run()\n", - "Traceback (most recent call last):\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 315, in _bootstrap\n", - " self.run()\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - "Traceback (most recent call last):\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 315, in _bootstrap\n", - " self.run()\n", - "Process Process-611:\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/sstar/train.py\", line 452, in _simulation_worker\n", - " ts.dump(output_dir+'/'+output_prefix+f'{rep}.ts')\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/sstar/train.py\", line 452, in _simulation_worker\n", - " ts.dump(output_dir+'/'+output_prefix+f'{rep}.ts')\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/trees.py\", line 4122, in dump\n", - " file, local_file = util.convert_file_like_to_open_file(file_or_path, \"wb\")\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/sstar/train.py\", line 452, in _simulation_worker\n", - " ts.dump(output_dir+'/'+output_prefix+f'{rep}.ts')\n", - "Traceback (most recent call last):\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/trees.py\", line 4122, in dump\n", - " file, local_file = util.convert_file_like_to_open_file(file_or_path, \"wb\")\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/util.py\", line 692, in convert_file_like_to_open_file\n", - " _file = open(path, mode)\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 315, in _bootstrap\n", - " self.run()\n", - "FileNotFoundError: [Errno 2] No such file or directory: 'v30tlabeltest/ldtest1.ts'\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/trees.py\", line 4122, in dump\n", - " file, local_file = util.convert_file_like_to_open_file(file_or_path, \"wb\")\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/sstar/train.py\", line 452, in _simulation_worker\n", - " ts.dump(output_dir+'/'+output_prefix+f'{rep}.ts')\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/util.py\", line 692, in convert_file_like_to_open_file\n", - " _file = open(path, mode)\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/trees.py\", line 4122, in dump\n", - " file, local_file = util.convert_file_like_to_open_file(file_or_path, \"wb\")\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/util.py\", line 692, in convert_file_like_to_open_file\n", - " _file = open(path, mode)\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/util.py\", line 692, in convert_file_like_to_open_file\n", - " _file = open(path, mode)\n", - "FileNotFoundError: [Errno 2] No such file or directory: 'v30tlabeltest/ldtest2.ts'\n", - "FileNotFoundError: [Errno 2] No such file or directory: 'v30tlabeltest/ldtest4.ts'\n", - "FileNotFoundError: [Errno 2] No such file or directory: 'v30tlabeltest/ldtest0.ts'\n", - "Process Process-612:\n", - "Traceback (most recent call last):\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 315, in _bootstrap\n", - " self.run()\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/sstar/train.py\", line 452, in _simulation_worker\n", - " ts.dump(output_dir+'/'+output_prefix+f'{rep}.ts')\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/trees.py\", line 4122, in dump\n", - " file, local_file = util.convert_file_like_to_open_file(file_or_path, \"wb\")\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/util.py\", line 692, in convert_file_like_to_open_file\n", - " _file = open(path, mode)\n", - "FileNotFoundError: [Errno 2] No such file or directory: 'v30tlabeltest/ldtest5.ts'\n", - "Process Process-610:\n", - "Traceback (most recent call last):\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 315, in _bootstrap\n", - " self.run()\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/multiprocessing/process.py\", line 108, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/sstar/train.py\", line 452, in _simulation_worker\n", - " ts.dump(output_dir+'/'+output_prefix+f'{rep}.ts')\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/trees.py\", line 4122, in dump\n", - " file, local_file = util.convert_file_like_to_open_file(file_or_path, \"wb\")\n", - " File \"/home/jalh/anaconda3/envs/sstar-analysis2/lib/python3.8/site-packages/tskit/util.py\", line 692, in convert_file_like_to_open_file\n", - " _file = open(path, mode)\n", - "FileNotFoundError: [Errno 2] No such file or directory: 'v30tlabeltest/ldtest3.ts'\n" - ] - } - ], - "source": [ - "#simulate test data and predict\n", - "infer.simulate_predict_introgression(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread,output_prefix, output_dir, statsmodelsfile, scikitfile)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "sstar-analysis2", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.15" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sstar/notebooks/small_example.ipynb b/sstar/notebooks/small_example.ipynb deleted file mode 100644 index f03dfcb..0000000 --- a/sstar/notebooks/small_example.ipynb +++ /dev/null @@ -1,125 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sstar" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from sstar import preprocess\n", - "from sstar import train\n", - "from sstar import stats\n", - "from sstar import infer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#parameters for training\n", - "\n", - "demo_model_file=\"./examples/models/archie2.yaml\"\n", - "nrep = 10000\n", - "nref = 25\n", - "ntgt= 25\n", - "ref_id = 'N1'\n", - "tgt_id = 'N2'\n", - "src_id = 'Na'\n", - "seq_len = 50000\n", - "mut_rate = 1.25e-8\n", - "rec_rate = 1e-8\n", - "thread = 6\n", - "output_prefix = 'v21t'\n", - "output_dir = \"v2105new\"\n", - "seed = None\n", - "preprocess.store_global_parameters(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#train logistisc classifier with simulated data\n", - "train.train_parameters_archienew_fin(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#test_parameters\n", - "demo_model_file=\"./examples/models/archie2.yaml\"\n", - "nrep = 10\n", - "nref = 25\n", - "ntgt= 25\n", - "ref_id = 'N1'\n", - "tgt_id = 'N2'\n", - "src_id = 'Na'\n", - "seq_len = 1000000\n", - "mut_rate = 1.25e-8\n", - "rec_rate = 1e-8\n", - "thread = 6\n", - "seed = None\n", - "preprocess.store_global_parameters(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#output filenames for models\n", - "scikitfile = output_prefix + \".scikit.pickle\"\n", - "statsmodelsfile = output_prefix + \".statsmodels.pickle\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#simulate test data and predict\n", - "infer.simulate_predict_introgression(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread,output_prefix, output_dir, statsmodelsfile, scikitfile)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "sstar-analysis2", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.15" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/sstar/train.py b/sstar/train.py index a78b138..8505f07 100644 --- a/sstar/train.py +++ b/sstar/train.py @@ -24,13 +24,23 @@ import stats import preprocess import os +from concurrent.futures import ProcessPoolExecutor as Pool + +global output_dir +global ref_ind_file +global tgt_ind_file +global anc_allele_file +global win_len +global win_step +global thread +global match_bonus +global archaic_prop +global mismatch_penalty +global max_mismatch +global process_archie +global not_archaic_prop +global seq_len -def get_all_folders_train(output_dir): - #res_dir = os.path.join("results", "simulated_data", output_dir, ref_tgt_folder) - rep_folders = [] - for replicate, folder in enumerate(os.listdir(output_dir)): - rep_folders.append(os.path.join(output_dir, folder)) - return rep_folders def train(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed=None, train_archie=False): """ @@ -40,318 +50,12 @@ def train(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mu _simulation_manager(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed) if train_archie: - #_train_archie() + _train_archie(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir) else: _train_sstar() -def _train_archie(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, scikit=True, statsmodels=True, drop_dynamic_cols=True): - - outputfolder = output_dir - - #set filenames for individuals, reference and target - ref_ind_file = "new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".ref.ind.list" - tgt_ind_file = "new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".tgt.ind.list" - - create_ref_tgt_file(nref, ntgt, ref_ind_file, tgt_ind_file) - - anc_allele_file = None - - #set window length and stepsize - win_len = 50000 - win_step = 50000 - - #I think these parameters are NOT necessary for ArchIE - just retained for the signature of preprocess.process_data - match_bonus = 1 - max_mismatch = 1 - mismatch_penalty = 1 - - process_archie = True - - #tracts with a proportion between not_archaic and archaic are labeled as ambiguous (in _label) - archaic_prop = 0.7 - not_archaic_prop = 0.3 - - true_tracts = [] - - true_tracts_labeled = [] - - features = [] - - file_names = [] - - replicate_counter = 0 - - #reading of data, preprocessing - i.e., calculating statistics -, and obtaining & labeling of true tracts - for replicate, file in enumerate(os.listdir(output_dir)): - if file.endswith(".vcf"): - - filename = os.path.splitext(file)[0] - - - feature_file = os.path.splitext(file)[0]+'.features' - #computation of statistics - preprocess.process_data(os.path.join(output_dir,file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - - - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' - true_tracts.append ( pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) ) - - - #labeling of true tracts - true_tract_labeled = _label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len) - if true_tract_labeled is not None: - true_tract_labeled["rep"] = replicate_counter - - true_tracts_labeled.append(true_tract_labeled) - - feature = pd.read_csv(os.path.join(output_dir, feature_file), sep="\t") - feature["rep"] = replicate_counter - - features.append(feature) - file_names.append(filename) - replicate_counter = replicate_counter + 1 - - - feature_df_labeleds = [] - - #Labeling of features - for i, feature_df in enumerate(features): - - feature_df_labeled = label_feature_df_archie(feature_df, true_tracts_labeled[i]) - - feature_df_labeleds.append(feature_df_labeled) - - #create one big training dataframe - train_df = pd.concat(feature_df_labeleds) - - #train_df.to_csv(os.path.join(outputfolder, "features_full.csv")) - - #drop all unneccesary columns - train_df.drop(['rep', 'chrom', 'start', 'end', 'sample', 'interval', 'overlap', 'overlap_percentage', "label_one_1", "label_one_2", "label_one_3", "haplo"], axis=1, inplace=True, errors='ignore') - - #drop_dynamic_cols indicate whether non-fixed size features should be dropped - if drop_dynamic_cols == True: - dynamic_cols = [col for col in train_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] - train_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - - train_df.to_csv(os.path.join(outputfolder, "features_final.csv")) - - #start training - scikit_file = output_prefix + ".scikit.pickle" - statsmodels_file = output_prefix + ".statsmodels.pickle" - - #call training functions - - train_statsmodels(train_df, statsmodels_file) - - train_scikit(train_df, scikit_file) - - - - -def _train_archie_folders(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, scikit=True, statsmodels=True, drop_dynamic_cols=True, do_training = False): - - outputfolder = output_dir + "_features_df" - - if not os.path.exists(outputfolder): - os.makedirs(outputfolder) - - #set filenames for individuals, reference and target - ref_ind_file = "new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".ref.ind.list" - tgt_ind_file = "new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".tgt.ind.list" - - create_ref_tgt_file(nref, ntgt, ref_ind_file, tgt_ind_file) - - anc_allele_file = None - - #set window length and stepsize - win_len = 50000 - win_step = 50000 - - #I think these parameters are NOT necessary for ArchIE - just retained for the signature of preprocess.process_data - match_bonus = 1 - max_mismatch = 1 - mismatch_penalty = 1 - - process_archie = True - - #tracts with a proportion between not_archaic and archaic are labeled as ambiguous (in _label) - archaic_prop = 0.7 - not_archaic_prop = 0.3 - - true_tracts = [] - - true_tracts_labeled = [] - - features = [] - - file_names = [] - - replicate_counter = 0 - - #reading of data, preprocessing - i.e., calculating statistics -, and obtaining & labeling of true tracts - - for replicate1, folder in enumerate(os.listdir(output_dir)): - if os.path.isdir(os.path.join(output_dir, folder)): - for file in (os.listdir(os.path.join(output_dir, folder))): - if file.endswith(".vcf"): - - filename = os.path.splitext(file)[0] - - - feature_file = os.path.splitext(file)[0]+'.features' - #computation of statistics - preprocess.process_data(os.path.join(output_dir,folder, file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,folder,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - - - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' - true_tracts.append ( pd.read_csv(os.path.join(output_dir, folder, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) ) - - - #labeling of true tracts - true_tract_labeled = _label(os.path.join(output_dir, folder, true_tract), archaic_prop, not_archaic_prop, seq_len) - if true_tract_labeled is not None: - true_tract_labeled["rep"] = replicate_counter - - true_tracts_labeled.append(true_tract_labeled) - - feature = pd.read_csv(os.path.join(output_dir, folder, feature_file), sep="\t") - feature["rep"] = replicate_counter - - features.append(feature) - file_names.append(filename) - replicate_counter = replicate_counter + 1 - - - feature_df_labeleds = [] - - #Labeling of features - for i, feature_df in enumerate(features): - - feature_df_labeled = label_feature_df_archie(feature_df, true_tracts_labeled[i]) - - feature_df_labeleds.append(feature_df_labeled) - - #create one big training dataframe - train_df = pd.concat(feature_df_labeleds) - - #train_df.to_csv(os.path.join(outputfolder, "features_full.csv")) - - #drop all unneccesary columns - train_df.drop(['rep', 'chrom', 'start', 'end', 'sample', 'interval', 'overlap', 'overlap_percentage', "label_one_1", "label_one_2", "label_one_3", "haplo"], axis=1, inplace=True, errors='ignore') - - #drop_dynamic_cols indicate whether non-fixed size features should be dropped - if drop_dynamic_cols == True: - dynamic_cols = [col for col in train_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] - train_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - - train_df.to_csv(os.path.join(outputfolder, "features_final.csv")) - - - if do_training == True: - #start training - scikit_file = output_prefix + ".scikit.pickle" - statsmodels_file = output_prefix + ".statsmodels.pickle" - - #call training functions - - train_statsmodels(train_df, statsmodels_file) - - train_scikit(train_df, scikit_file) - - -#from multiprocessing import Pool -from concurrent.futures import ProcessPoolExecutor as Pool -def parallel_process_data(output_dir, output_tuples, ref_ind_file, tgt_ind_file, anc_allele_file, win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie, archaic_prop, not_archaic_prop, seq_len): - file = output_tuples[1] - replicate_counter = output_tuples[0] - - - filename = os.path.splitext(file)[0] - - - feature_file = os.path.splitext(file)[0]+'.features' - #computation of statistics - preprocess.process_data(os.path.join(output_dir,file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - - - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' - true_tract_data = pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) - - - ###true_tracts.append ( true_tract_data) - - - #labeling of true tracts - true_tract_labeled = _label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len) - if true_tract_labeled is not None: - true_tract_labeled["rep"] = replicate_counter - - #true_tracts_labeled.append(true_tract_labeled) - - feature = pd.read_csv(os.path.join(output_dir, feature_file), sep="\t") - feature["rep"] = replicate_counter - - #features.append(feature) - #file_names.append(filename) - #replicate_counter = replicate_counter + 1 - - return true_tract_labeled, feature - - -def parallel_process_label_data(output_dir, output_tuples, ref_ind_file, tgt_ind_file, anc_allele_file, win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie, archaic_prop, not_archaic_prop, seq_len): - file = output_tuples[1] - replicate_counter = output_tuples[0] - - - #not necessary anymore - #filename = os.path.splitext(file)[0] - - - feature_file = os.path.splitext(file)[0]+'.features' - #computation of statistics - preprocess.process_data(os.path.join(output_dir,file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - - - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' - - #reading not necessary - #true_tract_data = pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) - - - ###true_tracts.append ( true_tract_data) - - - #labeling of true tracts - true_tract_labeled = _label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len) - if true_tract_labeled is not None: - true_tract_labeled["rep"] = replicate_counter - - #true_tracts_labeled.append(true_tract_labeled) - - feature = pd.read_csv(os.path.join(output_dir, feature_file), sep="\t") - feature["rep"] = replicate_counter - - #features.append(feature) - #file_names.append(filename) - - #replicate_counter = replicate_counter + 1 - - - #for i, feature_df in enumerate(features): - - feature_df_labeled = label_feature_df_archie(feature, true_tract_labeled) - - #feature_df_labeleds.append(feature_df_labeled) - - #return true_tract_labeled, feature - return feature_df_labeled - - def parallel_process_label_data_sm(output_tuples): global output_dir @@ -369,71 +73,29 @@ def parallel_process_label_data_sm(output_tuples): global not_archaic_prop global seq_len - file = output_tuples[1] replicate_counter = output_tuples[0] - - #not necessary anymore - #filename = os.path.splitext(file)[0] - - feature_file = os.path.splitext(file)[0]+'.features' #computation of statistics preprocess.process_data(os.path.join(output_dir,file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' - #reading not necessary - #true_tract_data = pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) - - - ###true_tracts.append ( true_tract_data) - - #labeling of true tracts true_tract_labeled = _label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len) if true_tract_labeled is not None: true_tract_labeled["rep"] = replicate_counter - #true_tracts_labeled.append(true_tract_labeled) - feature = pd.read_csv(os.path.join(output_dir, feature_file), sep="\t") feature["rep"] = replicate_counter - #features.append(feature) - #file_names.append(filename) - - #replicate_counter = replicate_counter + 1 - - - #for i, feature_df in enumerate(features): feature_df_labeled = label_feature_df_archie(feature, true_tract_labeled) - #feature_df_labeleds.append(feature_df_labeled) - - #return true_tract_labeled, feature return feature_df_labeled -global output_dir -global ref_ind_file -global tgt_ind_file -global anc_allele_file -global win_len -global win_step -global thread -global match_bonus -global archaic_prop -global mismatch_penalty -global max_mismatch -global process_archie -global not_archaic_prop -global seq_len - -#def store_global(output_dir, ref_ind_file, tgt_ind_file, anc_allele_file, win_len, win_step, thread, match_bonus, archaic_prop ,mismatch_penalty, max_mismatch, process_archie, not_archaic_prop, seq_len): def store_global(output_dir_new, ref_ind_file_new, tgt_ind_file_new, anc_allele_file_new, win_len_new, win_step_new, thread_new, match_bonus_new, archaic_prop_new ,mismatch_penalty_new, max_mismatch_new, process_archie_new, not_archaic_prop_new, seq_len_new): global output_dir global ref_ind_file @@ -465,130 +127,37 @@ def store_global(output_dir_new, ref_ind_file_new, tgt_ind_file_new, anc_allele_ seq_len = seq_len_new -def _train_archie_return_df_wo_parallel(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, scikit=True, statsmodels=True, drop_dynamic_cols=True, do_training = False): - - outputfolder = output_dir + "_features_df" - - if not os.path.exists(outputfolder): - os.makedirs(outputfolder) - - #set filenames for individuals, reference and target - ref_ind_file = "new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".ref.ind.list" - tgt_ind_file = "new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".tgt.ind.list" - - create_ref_tgt_file(nref, ntgt, ref_ind_file, tgt_ind_file) - - anc_allele_file = None - - #set window length and stepsize - win_len = 50000 - win_step = 50000 - - #I think these parameters are NOT necessary for ArchIE - just retained for the signature of preprocess.process_data - match_bonus = 1 - max_mismatch = 1 - mismatch_penalty = 1 - - process_archie = True - - #tracts with a proportion between not_archaic and archaic are labeled as ambiguous (in _label) - archaic_prop = 0.7 - not_archaic_prop = 0.3 - - true_tracts = [] - - true_tracts_labeled = [] - - features = [] - - file_names = [] - - replicate_counter = 0 - - #reading of data, preprocessing - i.e., calculating statistics -, and obtaining & labeling of true tracts - for replicate, file in enumerate(os.listdir(output_dir)): - if file.endswith(".vcf"): - - filename = os.path.splitext(file)[0] - - - feature_file = os.path.splitext(file)[0]+'.features' - #computation of statistics - preprocess.process_data(os.path.join(output_dir,file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - - - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' - true_tracts.append ( pd.read_csv(os.path.join(output_dir, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) ) - - - #labeling of true tracts - true_tract_labeled = _label(os.path.join(output_dir, true_tract), archaic_prop, not_archaic_prop, seq_len) - if true_tract_labeled is not None: - true_tract_labeled["rep"] = replicate_counter - - true_tracts_labeled.append(true_tract_labeled) - - feature = pd.read_csv(os.path.join(output_dir, feature_file), sep="\t") - feature["rep"] = replicate_counter - - features.append(feature) - file_names.append(filename) - replicate_counter = replicate_counter + 1 - - - - - feature_df_labeleds = [] - - #Labeling of features - for i, feature_df in enumerate(features): - - feature_df_labeled = label_feature_df_archie(feature_df, true_tracts_labeled[i]) - - feature_df_labeleds.append(feature_df_labeled) - - #create one big training dataframe - train_df = pd.concat(feature_df_labeleds) - - #train_df.to_csv(os.path.join(outputfolder, "features_full.csv")) - - #drop all unneccesary columns - train_df.drop(['rep', 'chrom', 'start', 'end', 'sample', 'interval', 'overlap', 'overlap_percentage', "label_one_1", "label_one_2", "label_one_3", "haplo"], axis=1, inplace=True, errors='ignore') - - #drop_dynamic_cols indicate whether non-fixed size features should be dropped - if drop_dynamic_cols == True: - dynamic_cols = [col for col in train_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] - train_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - - - #train_df.to_csv(os.path.join(outputfolder, "features_final.csv")) - - - if do_training == True: - #start training - scikit_file = output_prefix + ".scikit.pickle" - statsmodels_file = output_prefix + ".statsmodels.pickle" - - #call training functions - - train_statsmodels(train_df, statsmodels_file) - - train_scikit(train_df, scikit_file) - - return train_df - - - -def _train_archie_return_df(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, scikit=True, statsmodels=True, drop_dynamic_cols=True, do_training = False): - - #outputfolder = output_dir + "_features_df" - #if not os.path.exists(outputfolder): - # os.makedirs(outputfolder) +def _train_archie(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, scikit=True, statsmodels=True, drop_dynamic_cols=False, do_training = True): + """ + Description: + compute prdictions for files in all subdirectories of the path indicated by output_dirs + Arguments: + demo_model_file str: demographic model + nrep int: number of replicates + nref int: number of reference individuals + ntgt int: number of target individuals + ref_id str: name of reference population + tgt_id str: name of target population + src_id str: name os fource population + seq_len int: sequence length + mut_rate float: mutation rate + rec_rate float: recombination rate + threat int: number of threads + output_prefix str: string used to determine logistic regression model name + output_dir str: indicates folder with subdirectories containing files for prediction + scikit bool: if True, a model using scikit-learn is trained + statsmodels bool: if True, a model using statsmodels is trained + drop_dynamic_cols bool: if True, features of non-fixed size are removed + do_training: if True, models are trained; otherwise only statistics for training are computed + + Returns: + train_df DataFrame: contains all windows of train data and corresponding information (statistics, label,...) + """ #set filenames for individuals, reference and target ref_ind_file = "new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".ref.ind.list" tgt_ind_file = "new_sim" + "_nref" + str(nref) + "_ntgt" + str(ntgt) + ".tgt.ind.list" - + #create the files according to tski conventions create_ref_tgt_file(nref, ntgt, ref_ind_file, tgt_ind_file) anc_allele_file = None @@ -608,14 +177,7 @@ def _train_archie_return_df(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, s archaic_prop = 0.7 not_archaic_prop = 0.3 - true_tracts = [] - - true_tracts_labeled = [] - - features = [] - - file_names = [] - + #store parameters globally so that they can be accessed from the parallelized pool store_global(output_dir, ref_ind_file, tgt_ind_file, anc_allele_file, win_len, win_step, thread, match_bonus, archaic_prop ,mismatch_penalty, max_mismatch, process_archie, not_archaic_prop, seq_len) #nur fuer repl number @@ -624,18 +186,14 @@ def _train_archie_return_df(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, s if file.endswith(".vcf"): output_tuples.append((replicate, file)) - - feature_df_labeleds = [] pool = Pool() - + #call parallelized function which returns labelled training data feature_df_labeleds = pool.map(parallel_process_label_data_sm, output_tuples) #create one big training dataframe train_df = pd.concat(feature_df_labeleds) - #train_df.to_csv(os.path.join(outputfolder, "features_full.csv")) - #drop all unneccesary columns train_df.drop(['rep', 'chrom', 'start', 'end', 'sample', 'interval', 'overlap', 'overlap_percentage', "label_one_1", "label_one_2", "label_one_3", "haplo"], axis=1, inplace=True, errors='ignore') @@ -644,27 +202,47 @@ def _train_archie_return_df(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, s dynamic_cols = [col for col in train_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] train_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - #train_df.to_csv(os.path.join(outputfolder, "features_final.csv")) - - if do_training == True: #start training scikit_file = output_prefix + ".scikit.pickle" statsmodels_file = output_prefix + ".statsmodels.pickle" #call training functions - train_statsmodels(train_df, statsmodels_file) - train_scikit(train_df, scikit_file) return train_df +def _train_archie_folders(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, scikit=True, statsmodels=True, drop_dynamic_cols=True, do_training = False): + """ + Description: + compute prdictions for files in all subdirectories of the path indicated by output_dirs + Arguments: + demo_model_file str: demographic model + nrep int: number of replicates + nref int: number of reference individuals + ntgt int: number of target individuals + ref_id str: name of reference population + tgt_id str: name of target population + src_id str: name os fource population + seq_len int: sequence length + mut_rate float: mutation rate + rec_rate float: recombination rate + threat int: number of threads + output_prefix str: string used to determine logistic regression model name + output_dir str: indicates folder with subdirectories containing files for prediction + scikit bool: if True, a model using scikit-learn is trained + statsmodels bool: if True, a model using statsmodels is trained + drop_dynamic_cols bool: if True, features of non-fixed size are removed + do_training: if True, models are trained; otherwise only statistics for training are computed + + Returns: + train_df DataFrame: contains all windows of train data and corresponding information (statistics, label,...) + """ -def _train_archie_folders_return_df(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, scikit=True, statsmodels=True, drop_dynamic_cols=True, do_training = False): outputfolder = output_dir + "_features_df" @@ -687,7 +265,6 @@ def _train_archie_folders_return_df(demo_model_file, nrep, nref, ntgt, ref_id, t match_bonus = 1 max_mismatch = 1 mismatch_penalty = 1 - process_archie = True #tracts with a proportion between not_archaic and archaic are labeled as ambiguous (in _label) @@ -695,33 +272,25 @@ def _train_archie_folders_return_df(demo_model_file, nrep, nref, ntgt, ref_id, t not_archaic_prop = 0.3 true_tracts = [] - true_tracts_labeled = [] - features = [] - file_names = [] - replicate_counter = 0 #reading of data, preprocessing - i.e., calculating statistics -, and obtaining & labeling of true tracts - for replicate1, folder in enumerate(os.listdir(output_dir)): if os.path.isdir(os.path.join(output_dir, folder)): for file in (os.listdir(os.path.join(output_dir, folder))): if file.endswith(".vcf"): filename = os.path.splitext(file)[0] - - feature_file = os.path.splitext(file)[0]+'.features' + #computation of statistics preprocess.process_data(os.path.join(output_dir,folder, file), ref_ind_file, tgt_ind_file, anc_allele_file, os.path.join(output_dir,folder,feature_file), win_len, win_step, thread, match_bonus, max_mismatch, mismatch_penalty, process_archie) - true_tract = os.path.splitext(file)[0]+'.true.tracts.bed' true_tracts.append ( pd.read_csv(os.path.join(output_dir, folder, true_tract), sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind']) ) - #labeling of true tracts true_tract_labeled = _label(os.path.join(output_dir, folder, true_tract), archaic_prop, not_archaic_prop, seq_len) @@ -737,14 +306,12 @@ def _train_archie_folders_return_df(demo_model_file, nrep, nref, ntgt, ref_id, t file_names.append(filename) replicate_counter = replicate_counter + 1 - feature_df_labeleds = [] #Labeling of features for i, feature_df in enumerate(features): feature_df_labeled = label_feature_df_archie(feature_df, true_tracts_labeled[i]) - feature_df_labeleds.append(feature_df_labeled) #create one big training dataframe @@ -760,19 +327,15 @@ def _train_archie_folders_return_df(demo_model_file, nrep, nref, ntgt, ref_id, t dynamic_cols = [col for col in train_df.columns if ('-ton' in col or col.startswith("pairwised_dist"))] train_df.drop(dynamic_cols, axis=1, inplace = True, errors='ignore') - #train_df.to_csv(os.path.join(outputfolder, "features_final.csv")) - if do_training == True: #start training scikit_file = output_prefix + ".scikit.pickle" statsmodels_file = output_prefix + ".statsmodels.pickle" #call training functions - train_statsmodels(train_df, statsmodels_file) - train_scikit(train_df, scikit_file) return train_df @@ -828,7 +391,6 @@ def train_statsmodels(train_df, save_filename): - def train_scikit(train_df, save_filename): """ Description: @@ -838,7 +400,7 @@ def train_scikit(train_df, save_filename): train_df DataFrame: Training data save_filename str: filename for output model """ - + import pickle from sklearn.linear_model import LogisticRegression y = train_df['label'] @@ -847,12 +409,10 @@ def train_scikit(train_df, save_filename): X.replace(np.nan, 0, inplace=True) X.replace(pd.NA, 0, inplace=True) + #training; currently, no regularization etc. is performed model = LogisticRegression(solver="newton-cg", penalty=None, max_iter=1000) - model.fit(X,y.astype(int)) - import pickle - pickle.dump(model, open(save_filename, "wb")) @@ -861,7 +421,6 @@ def _train_sstar(): - def label_feature_df_archie(feature_df, true_tract_list, discard_ambiguous=True, replicates = True): """ Description: @@ -870,7 +429,8 @@ def label_feature_df_archie(feature_df, true_tract_list, discard_ambiguous=True, Arguments: feature_df DataFrame: (unlabeled) Training data true_tract_list DataFrame: true tracts to use for labeling - discard_ambiguous Boolean: discard tracts classified as ambiguous + discard_ambiguous bool: discard tracts classified as ambiguous + replicates bool: the dataset contains replicates """ feature_df['label'] = 0 @@ -888,8 +448,6 @@ def label_feature_df_archie(feature_df, true_tract_list, discard_ambiguous=True, if replicates == True: replicate = int(entry[5]) - - if replicates == True: conditions = (feature_df["sample"] == ind) & (feature_df["haplo"] == haplo) & (feature_df["rep"] == replicate) else: @@ -908,11 +466,10 @@ def label_feature_df_archie(feature_df, true_tract_list, discard_ambiguous=True, - def label_feature_df(feature_df, true_tract_list, only_above_threshold=False, discard_ambiguous=True, replicates=False): """ Description: - Label data for training and compute overlap - NOT USED + Label data for training and compute overlap Arguments: feature_df DataFrame: (unlabeled) Training data @@ -938,7 +495,6 @@ def label_feature_df(feature_df, true_tract_list, only_above_threshold=False, di label_two_index = feature_df.columns.get_loc("label_one_2") label_three_index = feature_df.columns.get_loc("label_one_3") - overlap_index = feature_df.columns.get_loc("overlap") overlap_percentage_index = feature_df.columns.get_loc("overlap_percentage") start_index = feature_df.columns.get_loc("start") @@ -962,7 +518,6 @@ def label_feature_df(feature_df, true_tract_list, only_above_threshold=False, di if replicates == True: replicate = int(entry[5]) - if replicates == True: conditions=np.where((feature_array[:, sample_index] == ind) & ( feature_array[:, haplo_index] == haplo) & ( feature_array[:, rep_index] == replicate)) else: conditions=np.where((feature_array[:, sample_index] == ind) & ( feature_array[:, haplo_index] == haplo)) @@ -979,7 +534,6 @@ def label_feature_df(feature_df, true_tract_list, only_above_threshold=False, di feature_array[np.where((feature_array[:, overlap_percentage_index] < 0.3)), label_three_index] = 1 #back to pandas df for convenience - feature_df = pd.DataFrame(feature_array, columns = feature_df.columns) if only_above_threshold == True: @@ -991,7 +545,6 @@ def label_feature_df(feature_df, true_tract_list, only_above_threshold=False, di return feature_df #functions for calculating overlap - def getOverlap_features_tracts_np(startvalue, tract_start, tract_end, feature_start, feature_end): return startvalue + max(0, min(tract_end, feature_end) - max(tract_start, feature_start)) @@ -1002,31 +555,6 @@ def getOverlap_features_tracts_percentage_np(startvalue, tract_start, tract_end, return percentage -def getOverlap(tract_start, tract_end, feature_start, feature_end): - return max(0, min(tract_end, feature_end) - max(tract_start, feature_start)) - -def getOverlapPercentage(tract_interval, feature_interval): - overlap = max(0, min(tract_interval[1], feature_interval[1]) - max(tract_interval[0], feature_interval[0])) - percentage = overlap / (feature_interval[1] - feature_interval[0]) - return percentage - -def getOverlap_pd(tract_int, feature_int): - feature_start = feature_int.left - feature_end = feature_int.right - tract_start = tract_int.left - tract_end = tract_int.right - return max(0, min(feature_end, tract_end) - max(feature_start, tract_start)) - -def getOverlapPercentage_pd(tract_int, feature_int): - feature_start = feature_int.left - feature_end = feature_int.right - tract_start = tract_int.left - tract_end = tract_int.right - overlap = max(0, min(feature_end, tract_end) - max(feature_start, tract_start)) - percentage = overlap / (feature_end - feature_start) - return percentage - - def _simulation_manager(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, seed): """ """ @@ -1099,7 +627,6 @@ def _simulation_worker_folders(in_queue, out_queue, demography, samples, tgt_id, """ - while True: rep = in_queue.get() ts = msprime.sim_ancestry( @@ -1111,22 +638,12 @@ def _simulation_worker_folders(in_queue, out_queue, demography, samples, tgt_id, random_seed=seed, ) - #for var in ts.variants(): - #print(var.site.position, var.alleles, var.genotypes, sep="\t") - #print(np.unique(var.alleles)) - #print(np.unique(var.genotypes)) - #ts = msprime.sim_mutations(ts, rate=mut_rate, random_seed=seed) + #use BinaryMutationModel ts = msprime.sim_mutations(ts, rate=mut_rate, random_seed=seed, model=msprime.BinaryMutationModel()) - #for var in ts.variants(): - #print(var.site.position, var.alleles, var.genotypes, sep="\t") - #print(np.unique(var.alleles)) - #print(np.unique(var.genotypes)) - true_tracts = _get_true_tracts(ts, tgt_id, src_id) - os.makedirs(os.path.join(output_dir, str(rep)), exist_ok=True) ts.dump(output_dir+'/'+ str(rep) + '/' + output_prefix+f'{rep}.ts') @@ -1141,8 +658,6 @@ def _simulation_worker_folders(in_queue, out_queue, demography, samples, tgt_id, df.drop_duplicates(keep='first').to_csv(output_dir+'/'+ str(rep) + '/' + output_prefix+f'{rep}.true.tracts.bed', sep="\t", header=False, index=False) - - out_queue.put(rep) def _simulation_worker(in_queue, out_queue, demography, samples, tgt_id, src_id, seq_len, mut_rate, rec_rate, output_prefix, output_dir, seed): @@ -1150,7 +665,6 @@ def _simulation_worker(in_queue, out_queue, demography, samples, tgt_id, src_id, """ - #global df while True: rep = in_queue.get() @@ -1163,18 +677,10 @@ def _simulation_worker(in_queue, out_queue, demography, samples, tgt_id, src_id, random_seed=seed, ) - #for var in ts.variants(): - #print(var.site.position, var.alleles, var.genotypes, sep="\t") - #print(np.unique(var.alleles)) - #print(np.unique(var.genotypes)) #ts = msprime.sim_mutations(ts, rate=mut_rate, random_seed=seed) + # BinaryMutationModel ts = msprime.sim_mutations(ts, rate=mut_rate, random_seed=seed, model=msprime.BinaryMutationModel()) - - #for var in ts.variants(): - #print(var.site.position, var.alleles, var.genotypes, sep="\t") - #print(np.unique(var.alleles)) - #print(np.unique(var.genotypes)) true_tracts = _get_true_tracts(ts, tgt_id, src_id) @@ -1190,8 +696,6 @@ def _simulation_worker(in_queue, out_queue, demography, samples, tgt_id, src_id, df.drop_duplicates(keep='first').to_csv(output_dir+'/'+output_prefix+f'{rep}.true.tracts.bed', sep="\t", header=False, index=False) - - out_queue.put(rep) @@ -1267,20 +771,26 @@ def _add_label(row, archaic_prop, not_archaic_prop): df['prop'] = df['len'] / seq_len df['label'] = df.apply(lambda row: _add_label(row, archaic_prop, not_archaic_prop), axis=1) - #sep="\t", header=None, names=['chr', 'start', 'end', 'hap', 'ind'] return df - - -def train_parameters_archienew_fin(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir): - - preprocess.store_global_parameters(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir) - - train(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, train_archie=True) - - if __name__ == '__main__': outputfolder = "example_output" - train("./examples/models/BonoboGhost_4K19.yaml", 100, 50, 50, 'Western', 'Bonobo', 'Ghost', 50000, 1e-8, 1e-8, 2, 'testa', outputfolder, train_archie=True) + demo_model_file = "./examples/models/BonoboGhost_4K19.yaml" + nrep = 100 + nref = 50 + ntgt = 50 + ref_id = 'Western' + tgt_id = 'Bonobo' + src_id = 'Ghost' + seq_len = 50000 + mut_rate = 1e-8 + rec_rate = 1e-8 + thread = 2 + output_prefix = "train_prefix" + output_dir = outputfolder + train_archie = True + preprocess.store_global_parameters(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir) + + train(demo_model_file, nrep, nref, ntgt, ref_id, tgt_id, src_id, seq_len, mut_rate, rec_rate, thread, output_prefix, output_dir, train_archie)