From 480b05e1421fba24d64d44f77fce4b739c76c7a8 Mon Sep 17 00:00:00 2001 From: Joe Zhou Date: Mon, 15 Jun 2020 15:39:06 -0400 Subject: [PATCH] debug --- cnvlib/export.py | 5 +- cnvlib/scatter.py | 89 ++++++++++++++++++++++++++++++++- cnvlib/segmentation/__init__.py | 28 ++++++----- 3 files changed, 106 insertions(+), 16 deletions(-) diff --git a/cnvlib/export.py b/cnvlib/export.py index 50552656..f405188b 100644 --- a/cnvlib/export.py +++ b/cnvlib/export.py @@ -263,7 +263,7 @@ def assign_ci_start_end(segarr, cnarr): def segments2vcf(segments, ploidy, is_reference_male, is_sample_female): """Convert copy number segments to VCF records.""" - out_dframe = segments.data.loc[:, ["chromosome", "end", "log2", "probes"]] + out_dframe = segments.data.loc[:, ["chromosome", "end", "log2", "probes", "aberrant_cell_frac"]] out_dframe["start"] = segments.start.replace(0, 1) if "cn" in segments: @@ -326,7 +326,8 @@ def segments2vcf(segments, ploidy, is_reference_male, is_sample_female): "SVLEN=%d" % out_row.svlen, "FOLD_CHANGE=%f" % 2.0 ** out_row.log2, "FOLD_CHANGE_LOG=%f" % out_row.log2, - "PROBES=%d" % out_row.probes + "PROBES=%d" % out_row.probes, + "FRAC=%f" % out_row.aberrant_cell_frac ] if has_ci: fields.extend([ diff --git a/cnvlib/scatter.py b/cnvlib/scatter.py index 654aacfa..1cb21ac7 100644 --- a/cnvlib/scatter.py +++ b/cnvlib/scatter.py @@ -153,13 +153,14 @@ def cnv_on_genome(axis, probes, segments, do_trend=False, y_min=None, if has_subclones: is_subclone = 1 if seg.aberrant_cell_frac != clone_frac else 0 color = choose_segment_color(seg, segment_color) if not is_subclone else SUBCLONE_COLOR + print(chrom, seg.end-seg.start, color) axis.plot((seg.start + x_offset, seg.end + x_offset), (max(y_min+0.1, seg.log2), max(y_min+0.1, seg.log2)), color=color, linewidth=3, solid_capstyle='round') if is_subclone: cn_state = str(int(seg.cn1)) + "+" + str(int(seg.cn2)) text = f"{int(seg.aberrant_cell_frac*100)}% {cn_state}" - axis.text(seg.start+x_offset, max(y_min+0.1, seg.log2+0.01), text) + axis.text(seg.start+x_offset, max(y_min+0.1, seg.log2+0.01), text, fontsize=7) else: color = choose_segment_color(seg, segment_color) axis.plot((seg.start + x_offset, seg.end + x_offset), @@ -167,6 +168,92 @@ def cnv_on_genome(axis, probes, segments, do_trend=False, y_min=None, color=color, linewidth=3, solid_capstyle='round') return axis + +def my_cnv_on_genome(axis, probes, segments, do_trend=False, y_min=None, + y_max=None, segment_color=SEG_COLOR): + """Plot bin ratios and/or segments for all chromosomes on one plot.""" + # Configure axes etc. + patches = [ + mpatches.Patch(color='darkorange', label='clonal'), + mpatches.Patch(color=CNLOH_COLOR, label='clonal CN-LOH'), + mpatches.Patch(color=SUBCLONE_COLOR, label='subclonal'), + ] + axis.legend(handles=patches, loc="upper right") + axis.axhline(color='k') + axis.set_ylabel("Copy ratio (log2)") + has_subclones = False + if 'aberrant_cell_frac' in segments.data.columns: + has_subclones = True + clone_frac = max(segments['aberrant_cell_frac']) + segments['aberrant_cell_frac'] + if not (y_min and y_max): + if segments: + # Auto-scale y-axis according to segment mean-coverage values + # (Avoid spuriously low log2 values in HLA and chrY) + low_chroms = segments.chromosome.isin(('6', 'chr6', 'Y', 'chrY')) + seg_auto_vals = segments[~low_chroms]['log2'].dropna() + if not y_min: + y_min = (np.nanmin([seg_auto_vals.min() - .2, -1.5]) + if len(seg_auto_vals) else -2.5) + if not y_max: + y_max = (np.nanmax([seg_auto_vals.max() + .2, 1.5]) + if len(seg_auto_vals) else 2.5) + else: + if not y_min: + y_min = -2.5 + if not y_max: + y_max = 2.5 + axis.set_ylim(y_min, y_max) + + # Group probes by chromosome (to calculate plotting coordinates) + if probes: + chrom_sizes = plots.chromosome_sizes(probes) + chrom_probes = dict(probes.by_chromosome()) + # Precalculate smoothing window size so all chromosomes have similar + # degree of smoothness + # NB: Target panel has ~1k bins/chrom. -> 250-bin window + # Exome: ~10k bins/chrom. -> 2500-bin window + window_size = int(round(.15 * len(probes) / + probes.chromosome.nunique())) + else: + chrom_sizes = plots.chromosome_sizes(segments) + # Same for segment calls + chrom_segs = dict(segments.by_chromosome()) if segments else {} + + # Plot points & segments + x_starts = plots.plot_x_dividers(axis, chrom_sizes) + for chrom, x_offset in x_starts.items(): + if probes and chrom in chrom_probes: + subprobes = chrom_probes[chrom] + x = 0.5 * (subprobes['start'] + subprobes['end']) + x_offset + axis.scatter(x, subprobes['log2'], marker='.', + color=POINT_COLOR, edgecolor='none', alpha=0.2) + if do_trend: + # ENH break trendline by chromosome arm boundaries? + axis.plot(x, subprobes.smooth_log2(), #window_size), + color=POINT_COLOR, linewidth=2, zorder=-1) + + if chrom in chrom_segs: + for seg in chrom_segs[chrom]: + if has_subclones: + is_subclone = 1 if seg.aberrant_cell_frac != clone_frac else 0 + color = choose_segment_color(seg, segment_color) if not is_subclone else SUBCLONE_COLOR + print(chrom, seg.end-seg.start, color) + axis.plot((seg.start + x_offset, seg.end + x_offset), + (max(y_min+0.1, seg.log2), max(y_min+0.1, seg.log2)), + color=color, linewidth=3, solid_capstyle='round') + if is_subclone: + cn_state = str(int(seg.cn1)) + "+" + str(int(seg.cn2)) + text = f"{int(seg.aberrant_cell_frac*100)}% {cn_state}" + axis.text(seg.start+x_offset, max(y_min+0.1, seg.log2+0.01), text, fontsize=7) + else: + color = choose_segment_color(seg, segment_color) + axis.plot((seg.start + x_offset, seg.end + x_offset), + (max(y_min+0.1, seg.log2), max(y_min+0.1, seg.log2)), + color=color, linewidth=3, solid_capstyle='round') + return axis + + def snv_on_genome(axis, variants, chrom_sizes, segments, do_trend, segment_color): """Plot a scatter-plot of SNP chromosomal positions and shifts.""" axis.set_ylim(0.0, 1.0) diff --git a/cnvlib/segmentation/__init__.py b/cnvlib/segmentation/__init__.py index f1497124..891dafa5 100644 --- a/cnvlib/segmentation/__init__.py +++ b/cnvlib/segmentation/__init__.py @@ -91,8 +91,8 @@ def _ds(args): def _insert_bp(segarr, chrom, bp): df = segarr.reset_index() - df = df[df['chromosome']==chrom] - for i, row in df.iterrows(): + _df = df[df['chromosome']==chrom] + for i, row in _df.iterrows(): if (row['start']bp): new_row = row.copy() old_end = row['end'] @@ -208,17 +208,19 @@ def _do_segmentation(cnarr, method, threshold, variants=None, # seg_end = sys.maxsize if n == (len(segs)-1) else seg[1] seg_start = seg[0] seg_end = seg[1] - _cns = cns[cns['chromosome']==str(chrom)] - if len(_cns) == 0: - continue - seg_start_closest = min(list(_cns['start']), key=lambda x:abs(x-seg_start)) - seg_end_closest = min(list(_cns['end']), key=lambda x:abs(x-seg_end)) - seg_start_dist = abs(seg_start_closest - seg_start) - seg_end_dist = abs(seg_end_closest - seg_end) - if seg_start_dist>min_dist: - new_bps.append((chrom, seg_start)) - if seg_end_dist>min_dist: - new_bps.append((chrom, seg_end)) + # _cns = cns[cns['chromosome']==str(chrom)] + # if len(_cns) == 0: + # continue + # seg_start_closest = min(list(_cns['start']), key=lambda x:abs(x-seg_start)) + # seg_end_closest = min(list(_cns['end']), key=lambda x:abs(x-seg_end)) + # seg_start_dist = abs(seg_start_closest - seg_start) + # seg_end_dist = abs(seg_end_closest - seg_end) + # if seg_start_dist>min_dist: + # new_bps.append((chrom, seg_start)) + # if seg_end_dist>min_dist: + # new_bps.append((chrom, seg_end)) + new_bps.append((chrom, seg_start)) + new_bps.append((chrom, seg_end)) if len(new_bps) != 0: for s in new_bps: chrom = s[0]