#!/usr/bin/R # ----------------------------------------------- # Plot stats for fusions in the CKP25 mouse data, # stratified by cohesin genes and by DMG genes # Updated: 08/28/2022 # ----------------------------------------------- library(cbrbase) set_proj('DEVTRAJ', 'mosaicism') library(ggplot2) library(ggpubr) library(ggbeeswarm) library(tidyr) library(scales) library(RColorBrewer) library(viridis) library(ComplexHeatmap) options(width=170) source(paste0(sbindir, 'CKP25/load_metadata.R')) source(paste0(sbindir, 'CKP25/load_counts.R')) source(paste0(bindir, 'multiRegion/auxiliary_plotting_settings.R')) # Directories: imgdir = paste0(img, 'CKP25/') imgdir2 = paste0(imgdir, 'fusions/') imgpref = paste0(imgdir2, 'fusions_') cmd = paste('mkdir -p',imgdir, imgdir2) system(cmd) source(paste0(sbindir, 'CKP25/analysis/load_basic_fusions.R')) source(paste0(sbindir, 'CKP25/analysis/load_umap_basis.R')) source(paste0(sbindir, 'CKP25/analysis/load_go_signatures.R')) load.colors() # Score GO and signature gene sets: # --------------------------------- filepref = paste0(dbdir, 'CKP25/CKp25_annotation_scores') enr.rds = paste0(filepref, '_broad.Rds') enr.tsv = paste0(filepref, '_broad.tsv.gz') fullenr.rds = paste0(filepref, '_specific.Rds') kwds = names(siglist) if (!file.exists(enr.rds)){ marg = marg[colnames(amat)] enrdf = c() full.enrdf = c() for (kwd in kwds){ # Overall score for all genes in gene set: genes = siglist[[kwd]] ngenes = length(genes) genes = genes[genes %in% rownames(amat)] cs = log(10000 * colSums(amat[genes,]) / marg / ngenes + 1) enrdf = rbind(enrdf, data.frame(cell=names(cs), enr=cs, tag=kwd, ng=ngenes)) if (kwd %in% names(full.siglist)){ lset = full.siglist[[kwd]] for (term in names(lset)){ cs = log(10000 * colSums(amat[lset[[term]],]) / marg + 1) full.enrdf = rbind(full.enrdf, data.frame( cell=names(cs), enr=cs, tag=kwd, term=term)) } } } enrdf = merge(enrdf, smeta) full.enrdf = merge(full.enrdf, smeta) # Save terms: saveRDS(enrdf, file=enr.rds) write.table(enrdf, gzfile(enr.tsv), quote=F, row.names=F, sep="\t") saveRDS(full.enrdf, file=fullenr.rds) } else { enrdf = readRDS(enr.rds) full.enrdf = readRDS(fullenr.rds) } # Plot each term on the UMAP: # --------------------------- col_fun = function(x, pal=colrb, mx=2.5, rescale=FALSE){ palette = rev(pal) if (rescale){ x = scale(x) } x[x > mx] = mx x[x < -mx] = -mx bin <- cut(x, seq(-mx, mx, length.out=length(palette)), include.lowest=T) palette[bin] } for (kwd in kwds){ print(kwd) subdf = enrdf[enrdf$tag == kwd,] kwdstr = gsub(" ", "_", kwd) pltpref = paste0(imgpref, 'annotation_scores_umap_', kwdstr) png(paste0(pltpref, '.png'),res=300, width=3,height=3, units='in') par(mar=c(0,0,0,0)) plot(subdf[[u1]], subdf[[u2]], pch=19, col=col_fun(subdf$enr, pal=colrb, rescale=TRUE), axes=F, ylab='', xlab='', cex=.5) dev.off() # With updated coordinates for Dileep paper png(paste0(pltpref, '_V1V2.png'),res=300, width=3,height=3, units='in') par(mar=c(0,0,0,0)) plot(subdf$V1, subdf$V2, pch=19, col=col_fun(subdf$enr, pal=colrb, rescale=TRUE), axes=F, ylab='', xlab='', cex=.5) dev.off() } # Plot corresponding legend: # -------------------------- r.legend = Legend(at=seq(-2.5,2.5, 1), labels_gp = gpar(fontsize=5), title_gp = gpar(fontsize=5, fontface='bold'), col_fun=col_fun, title_position = "topleft", background='black', legend_height=unit(.25,'in'), legend_width=unit(1.25,'in'), title='z-scored expr.', direction = 'horizontal') rlegend = packLegend(r.legend) pdf(paste0(imgpref, 'annotation_score_umap_legend.pdf'), width=2, height=2) draw(rlegend, x = unit(.90,'in'), y=unit(.6,'in'), just = "top") dev.off() # Plot relative enrichment of different subtypes: # ----------------------------------------------- subdf = enrdf[enrdf$label %in% c('Excitatory','Stage 2'),] runcomp <- list(c("Ex0", "Ex2"), c("Ex0", "Ex3"), c("Ex0", "St.2"), c('Ex2', 'Ex3'), c('Ex2', 'St.2'), c('Ex3', 'St.2')) gplot = ggplot(subdf, aes(short.sublabel, enr, color=sublabel, fill=sublabel)) + facet_wrap(~tag, nrow=1, scale='free_y') + scale_color_manual(values=sublabel.col) + scale_fill_manual(values=sublabel.col) + geom_violin(fill=NA) + geom_quasirandom(varwidth=TRUE, size=.3) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) pltpref = paste0(imgpref, 'signatures_terms_expressionlevels_violin') ggsave(paste0(pltpref, '.png'), gplot, dpi=450, units='in', width=9, height=4) ggsave(paste0(pltpref, '.pdf'), gplot, dpi=450, units='in', width=9, height=4) # Merge at the individual level and plot individual-level boxplots: # ----------------------------------------------------------------- subdf = enrdf[enrdf$label %in% c('Excitatory', 'Stage 2'),] aggdf = aggregate(enr ~ mouse_id + sublabel + label + short.sublabel + tag + genotype, subdf, mean) aggdf$short.sublabel = factor(aggdf$short.sublabel, levels=c('In0','In1','Ex0','Ex1','Ex2','Ex3','St.2')) gplot = ggplot(aggdf, aes(short.sublabel, enr, fill=sublabel)) + facet_wrap(~tag, nrow=1, scale='free_y') + scale_fill_manual(values=sublabel.col) + geom_boxplot(outlier.shape=NA, alpha=1, lwd=.5) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) pltpref = paste0(imgpref, 'signatures_terms_expressionlevels_indiv_violin') ggsave(paste0(pltpref, '.png'), gplot, dpi=450, units='in', width=9, height=4) ggsave(paste0(pltpref, '.pdf'), gplot, dpi=450, units='in', width=9, height=4) # Repeat just for the double-strand break pathways # ----------------------------------------------------------------- subdf = full.enrdf[full.enrdf$label %in% c('Excitatory', 'Stage 2') & full.enrdf$tag == 'double-strand break repair',] aggdf = aggregate(enr ~ mouse_id + sublabel + label + short.sublabel + term + genotype, subdf, mean) aggdf$termname = termmap[aggdf$term] rep.terms = list('double-strand break repair'='DSB repair', 'nonhomologous end joining'='NHEJ', 'via'='via', 'homologous recombination'='HR', 'regulation'='reg.', 'positive'='pos.', 'negative'='neg.') for (i in 1:length(rep.terms)){ aggdf$termname = sub(names(rep.terms)[i], rep.terms[i], aggdf$termname) } aggdf$short.sublabel = factor(aggdf$short.sublabel, levels=c('In0','In1','Ex0','Ex1','Ex2','Ex3','St.2')) gplot = ggplot(aggdf, aes(short.sublabel, enr, fill=sublabel)) + facet_wrap(~termname, nrow=2, scale='free_y') + scale_fill_manual(values=sublabel.col) + geom_boxplot(outlier.shape=NA, alpha=1, lwd=.5) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', hide.ns=TRUE, bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) pltpref = paste0(imgpref, 'signatures_DSB_terms_expressionlevels_indiv_violin') ggsave(paste0(pltpref, '.png'), gplot, dpi=450, units='in', width=10, height=5) ggsave(paste0(pltpref, '.pdf'), gplot, dpi=450, units='in', width=10, height=5) # Plot the specific pathways for each kwd: # ---------------------------------------- load.colors() rownames(meta) = meta$cell subdf = full.enrdf[full.enrdf$label %in% c('Excitatory', 'Stage 2'),] for (kwd in kwds){ kwddf = subdf[subdf$tag == kwd,] kwddf$termname = termmap[kwddf$term] kmat = pivot.tomatrix(kwddf[,c('enr','termname','cell')], 'cell','enr') ctlab = meta[colnames(kmat), 'sublabel2'] ha = HeatmapAnnotation( Label=ctlab, Genotype=meta[colnames(kmat), 'genotype'], Mouse=meta[colnames(kmat), 'mouse_id'], col=list( Label=sublabel.col, Genotype=c('CK'='grey80', 'CKp25'='indianred'))) # pltmat = sweep(kmat, 1, apply(kmat, 1, max), '/') # NOTE: double-strand break repair is notable (break-induced replication) pltmat = t(scale(t(kmat))) Heatmap(pltmat, top_annotation = ha, show_column_names=FALSE, column_split = ctlab ) } # Correlations of these scores (nothing notable), driven by ovl. mostly: emat = pivot.tomatrix(enrdf[,c('enr','tag','cell')], 'cell', 'enr') ht = Heatmap(cor(t(emat)), col=rev(colspec), show_column_names=FALSE, ) # Fusions versus each of the overall scores: # ------------------------------------------ # Merge the fusion and GO expression data first ind = sample(1:nrow(enrdf), nrow(enrdf), replace=FALSE) enrdf = enrdf[ind,] # enrdf$has_intra = enrdf$cell %in% intradf$cell[intradf$close.fusion] enrdf$has_intra = enrdf$cell %in% intradf$cell enrdf$has_inter = enrdf$cell %in% interdf$cell enrdf$event_count = (enrdf$has_intra + 2 * enrdf$has_inter) enrdf = enrdf[order(enrdf$event_count, decreasing=F),] enrdf$has_event = enrdf$event_count > 0 enrdf$event_type = 'None' enrdf$event_type[enrdf$event_count == 1] = 'Intra' enrdf$event_type[enrdf$event_count == 2] = 'Inter' enrdf$event_type[enrdf$event_count == 3] = 'Both' subdf = enrdf[,c('has_intra', 'has_inter','cell','enr','tag')] subdf = gather(subdf, type, value, -cell, -enr, -tag) subdf$type = ifelse(subdf$type == 'has_intra', 'Intra', 'Inter') # For each, stratify by fusions - diff in expr. gplot = ggplot(subdf, aes(type, enr, fill=value)) + facet_wrap(~ tag, nrow=1, scale='free_y') + scale_fill_manual(values=c('grey80', 'grey50'), name='Has event') + geom_boxplot(outlier.size=.8) + labs(x=paste('Type of putative fusion event'), y='Norm. signature expression, log(X+1)', title='GO signatures in Excitatory/Stage 2 cells, stratified by presence of fusion event') + theme_pubr() + theme() + stat_compare_means(label='p.signif', method='t.test', vjust=.5, hjust=.5) pltpref = paste0(imgpref, 'signatures_terms_vs_fusionevents_boxplot') ggsave(paste0(pltpref, '.png'), gplot, dpi=450, units='in', width=15, height=4) ggsave(paste0(pltpref, '.pdf'), gplot, dpi=450, units='in', width=15, height=4) # Plot terms as a heatmap, annotate with # fusions: # ------------------------------------------------- subdf = enrdf[enrdf$label %in% c('Excitatory','Stage 2'),] subdf = subdf[subdf$tag %in% subkwds,] mat = pivot.tomatrix(subdf[,c('cell','tag','enr')], 'cell','enr') evdf = unique(enrdf[,c('cell', 'has_intra', 'has_inter','event_type')]) evdf$has_fusion = evdf$has_intra | evdf$has_inter rownames(evdf) = evdf$cell ux = 1.5 ctlab = meta[colnames(mat), 'sublabel2'] ha = HeatmapAnnotation( Label=ctlab, Genotype=meta[colnames(mat), 'genotype'], HasFusion=ifelse(evdf[colnames(mat), 'has_intra'], 'Yes','No'), annotation_name_gp = gpar(fontsize=5), simple_anno_size = unit(ux / 1.2, 'mm'), gap = unit(0, "mm"), col=list( Label=sublabel.col, HasFusion=c('No'='white', 'Yes'='black'), Genotype=c('CK'='grey80', 'CKp25'='indianred'))) set.seed(1) pltmat = t(scale(t(mat))) ht = Heatmap(pltmat, name='z-score', top_annotation=ha, show_column_names=FALSE, use_raster=FALSE, column_split=ctlab, width = ncol(pltmat)*unit(ux / 10, "mm"), height = nrow(pltmat)*unit(ux, "mm"), border_gp=gpar(color='black', lwd=.5), ) h = 2 + 1 / 15 * nrow(pltmat) w = 2 + 1 / 15 * ncol(pltmat) / 10 pltpref = paste0(imgpref, 'annotation_scores_heatmap') saveHeatmap(ht, pltpref, h=h, w=w) # Plot the scores + fusions on the pseudotime: # -------------------------------------------- # Load in pseudotime: dbpref = paste0(dbdir, 'CKP25/split_trajectory') scefit.rda = paste0(dbpref,'_dpt_GAMfit_dataset.Rda') load(scefit.rda) mo = attr(sce$tradeSeq$X, 'model.offset') dptdf = data.frame( dpt=sce$slingshot$pseudotime.V1, cell=names(mo)) # Merged dataframe subdf = enrdf[enrdf$tag %in% subkwds ,c('cell','enr','tag')] subev = evdf[,c('cell', 'has_fusion')] names(subev)[2] = 'enr' subev$enr = subev$enr * 1 subev$tag = 'has_fusion' subdf = rbind(subdf, subev) subdf = merge(subdf, dptdf) gplot = ggplot(subdf, aes(dpt, enr, color=tag)) + facet_wrap(~tag, scales='free_y') + geom_point(cex=.5) + geom_smooth(method='gam') + labs(y=paste0('log10(Expr)'), x='Diffusion Pseudotime') + theme_pubr() ggsave(paste0(imgpref, 'annotation_scores_dpt.png'), gplot, dpi=400, units='in', width=8, height=9) ggsave(paste0(imgpref, 'annotation_scores_dpt.pdf'), gplot, dpi=400, units='in', width=8, height=9) # Reduced: gplot = ggplot(subdf, aes(dpt, enr, color=tag)) + geom_smooth(method='gam') + labs(y=paste0('log10(Expr)'), x='Diffusion Pseudotime') + scale_x_continuous(expand=c(0,0)) + theme_pubr() + theme(legend.position='right') ggsave(paste0(imgpref, 'annotation_scores_dpt_joint.png'), gplot, dpi=400, units='in', width=4.75, height=3) ggsave(paste0(imgpref, 'annotation_scores_dpt_joint.pdf'), gplot, dpi=400, units='in', width=4.75, height=3) # Perform regressions and plot coefficients by set: # ------------------------------------------------- subdf = enrdf[enrdf$label %in% c('Excitatory', 'Stage 2'),] form = 'has_event ~ enr + n_counts + n_genes + genotype + timepoint' form = as.formula(form) resdf = c() for (kwd in kwds){ df = subdf[subdf$tag == kwd,] fit = lm(form, df) cfit = processLmFit(fit) cfit$tag = kwd resdf = rbind(resdf, cfit[grep('enr', cfit$coef),]) } resdf = resdf[order(resdf$p),] resdf$padj = p.adjust(resdf$p, method='fdr') resdf$col = with(resdf, ifelse(padj < 0.05, ifelse(Est < 0, 'Down', 'Up'), 'NS')) resdf = resdf[order(resdf$Est),] resdf$tag = factor(resdf$tag, levels=resdf$tag) # Plot all of these effects jointly: # ---------------------------------- kwddf = resdf[resdf$tag %in% subkwds,] pcols = brewer.pal(12, 'Paired') devals = c('NS'='grey80', 'Down'=pcols[2], 'Up'=pcols[6]) gp = ggplot(kwddf, aes(tag, Est, ymin=Est-2*SE, ymax=Est+2*SE, label=p, color=factor(col))) + geom_point() + geom_errorbar(width=.2) + geom_hline(yintercept=0, lty='dotted') + scale_color_manual(values=devals, name='Significance:') + theme_pubr() + coord_flip() + labs(x='Condition', y='Estimated effect size') regpref = paste0(imgpref, 'p25_reg_effectsizes_sigs_') saveGGplot(gp, paste0(regpref, 'errorbar'), w=4.5, h=3)