#!/usr/bin/R # --------------------------------------------------------------- # Check robustness of effects from multiple different directions: # NOTE: Hasn't been updated since changes in reviews # Updated: 08/31/22 # --------------------------------------------------------------- library(cbrbase) set_proj('DEVTRAJ', 'mosaicism') # Libraries: library(tidyr) library(ggplot2) library(ggpubr) library(RColorBrewer) options(width=170) # Make directories for analysis: project = 'ALZ' analysis = 'fusion' imgdir = paste0(img, project, '/', analysis , '/') imgpref = paste0(imgdir, analysis, "_mspred_", project, '_check_') cmd = paste0('mkdir -p ', imgdir) system(cmd) source(paste0(bindir, 'multiRegion/auxiliary_plotting_settings.R')) # Load in the two cohorts of candidate gene fusion counts: # -------------------------------------------------------- source(paste0(bindir, 'mosaicism/genefusions/load_fusion_data.R')) source(paste0(bindir, 'mosaicism/genefusions/auxiliary_fusion_plotting.R')) # countcutoff = 25000 countcutoff = 0 if (countcutoff > 0){ imgpref = paste0(imgpref, "nreads",countcutoff, "_") sub.ctdf = sub.ctdf[sub.ctdf$n_counts >= countcutoff,] } # Select cohort for tests: platforms = c('40SE', '150PE', '250PE', 'Joint') for (platform in platforms){ if (platform == 'Joint'){ use.ctdf = sub.ctdf } else { use.ctdf = sub.ctdf[sub.ctdf$platform == platform,] } if (length(unique(use.ctdf$platform)) > 1){ count.covar = '+ platform * (n_counts + n_genes + count_per_gene)' } else { count.covar = '+ n_counts + n_genes + count_per_gene' } # Run regressions: # ---------------- fulldf = c() tfvec = c(TRUE, FALSE) for (reg.var in reg.vars){ for (use.cc in tfvec){ for (use.ec in tfvec){ for (only.exc in tfvec){ if (only.exc){ df = use.ctdf[use.ctdf$relabel == 'Excitatory',] } else { df = use.ctdf } resdf = c() for (var in test.vars){ # Build formula: form.vec = c(reg.var, ' ~ ', var) if (!only.exc){ form.vec = c(form.vec, ' + relabel') } if (use.cc){ form.vec = c(form.vec, count.covar) } if (use.ec){ form.vec = c(form.vec, ext.covar) } form = asform(form.vec) # Fit model + process fit: if (reg.var %in% c('capped_count','count')){ fit = glm(form, df, family='poisson') } else if (reg.var %in% c('has_fusion')){ fit = glm(form, df, family='binomial') } else { fit = glm(form, df, family='gaussian') } cfit = processLmFit(fit) cfit$var = var resdf = rbind(resdf, cfit[2,]) } resdf = resdf[order(resdf$p),] resdf$padj = p.adjust(resdf$p, method='fdr') resdf$col = with(resdf, ifelse(padj < 0.05, ifelse(Est < 0, 'Down', 'Up'), 'NS')) resdf = resdf[order(resdf$Est),] resdf$coef = varmap[resdf$coef] resdf$coef = factor(resdf$coef, levels=resdf$coef) # Merge with results: resdf$reg.var = reg.var resdf$use.cc = ifelse(use.cc, 'countcovar', '') resdf$use.ec = ifelse(use.ec, 'extcovar', '') resdf$set = ifelse(only.exc, 'Exc. neurons', 'All cells') fulldf = rbind(fulldf, resdf) } } } } # Plot effects as heatmap: # ------------------------ fulldf$desc = with(fulldf, paste0(reg.var, ':', use.cc, '_', use.ec)) subdf = fulldf ind = subdf$reg.var %in% c('capped_rate','rate') subdf$Est[ind] = subdf$Est[ind] * median(sub.ctdf$n_counts) # Adjust subdf$cs = paste0(substr(subdf$set, 1, 3), ': ', subdf$coef) emat = pivot.tomatrix(subdf[,c('Est', 'cs', 'desc')], 'cs','Est') pmat = pivot.tomatrix(subdf[,c('padj', 'cs', 'desc')], 'cs','padj') mx = max(abs(emat)) mat.col_fun = colorRamp2(c(-mx, 0, mx), c("blue", "white", "red")) ux = 1.5 ht = Heatmap(emat, name='Est', col=mat.col_fun, width = ncol(emat)*unit(1.65 * ux, "mm"), height = nrow(emat)*unit(ux, "mm"), cluster_rows=FALSE, cluster_columns=FALSE, border_gp = gpar(lwd=.5, color='black'), column_split = sub(": .*", "", colnames(emat)), row_split = ifelse(1:nrow(emat) %in% grep('rate', rownames(emat)), 'Rate', 'Count'), cell_fun = function(j, i, x, y, w, h, col){ # Add the p-value text p = pmat[i,j] ann = ifelse(p < 0.05, ifelse(p < 0.01, ifelse(p < 0.001, '***','**'),'*'),'') grid.text(ann, x, y, gp=gpar(fontsize=5), vjust=.75)}) pltprefix = paste0(imgpref, 'robustness_test_heatmap_', platform) w = 2.5 + ncol(emat) / 15 h = 2 + nrow(emat) / 15 saveHeatmap(ht, pltprefix, w=w, h=h) } # Plot diff. variables vs. rate, without corrections: # --------------------------------------------------- uqvals = c('CTRL'='royalblue', colvals[['cogdx.ad']], '0'='grey80','1'='slateblue3', 'No'='grey80','Yes'='slateblue3') split.vars = c('cogdx.ad','bd','nrad','msex','Apoe_e4') for (reg.var in c('count','rate','capped_count', 'capped_rate')){ sub.ctdf$regvar = sub.ctdf[[reg.var]] ctlong = gather(sub.ctdf[,c('sample', 'regvar', 'platform', split.vars)], var, level, -sample, -regvar, -platform) if (reg.var %in% c('rate','capped_rate')){ ctlong$regvar = ctlong$regvar + 1e-5 } meandf = aggregate(regvar ~ platform + level + var, ctlong, mean) gp = ggplot(ctlong, aes(platform, regvar, fill=level)) + facet_wrap(~var, scales='free', ncol=1) + geom_boxplot(outlier.size=.5) + stat_compare_means(label='p.signif') + geom_point(data=meandf, aes(platform, regvar, fill=level), pch=24, cex=3) + labs(x='Group', y=reg.var) + scale_fill_manual(values=uqvals) + scale_color_manual(values=uqvals) + theme_pubr() + coord_flip() if (reg.var %in% c('rate','capped_rate')){ gp = gp + scale_y_log10() } pltprefix = paste0(imgpref, 'robustness_test_boxplots_', reg.var) saveGGplot(gp, pltprefix, w=5, h=8) } for (reg.var in c('count','rate','capped_count', 'capped_rate')){ sub.ctdf$regvar = sub.ctdf[[reg.var]] ctlong = gather(sub.ctdf[,c('projid', 'sample', 'regvar', 'platform', split.vars)], var, level, -sample, -regvar, -platform, -projid) if (reg.var %in% c('rate','capped_rate')){ ctlong$regvar = ctlong$regvar + 1e-5 } aggdf = aggregate(regvar ~ projid + platform + level + var, ctlong, mean) meandf = aggregate(regvar ~ platform + level + var, aggdf, mean) gp = ggplot(aggdf, aes(platform, regvar, fill=level)) + facet_wrap(~var, scales='free', ncol=1) + geom_boxplot(outlier.size=.5) + stat_compare_means(label='p.signif') + geom_point(data=meandf, aes(platform, regvar, fill=level), pch=24, cex=3) + labs(x='Group', y=reg.var) + scale_fill_manual(values=uqvals) + scale_color_manual(values=uqvals) + theme_pubr() + coord_flip() if (reg.var %in% c('rate','capped_rate')){ gp = gp + scale_y_log10() } pltprefix = paste0(imgpref, 'robustness_test_indlevel_boxplots_', reg.var) saveGGplot(gp, pltprefix, w=5, h=8) } # NOTE: Norm by ncounts or ngenes? (duplevel high in ALZ_repl > impacts ncounts) ggplot(sub.ctdf, aes(n_counts, count)) + geom_point() + geom_smooth(method='lm')