#!/usr/bin/R # ------------------------------------------------ # Estimate effect sizes for gene candidate fusions # --for different cell types # Updated: 03/21/23 # ------------------------------------------------ library(cbrbase) set_proj('DEVTRAJ', 'mosaicism') library(MASS) library(ggplot2) library(ggpubr) source(paste0(bindir, 'mosaicism/fusions_AD430/load_metadata.R')) source(paste0(bindir, 'multiRegion/auxiliary_plotting_settings.R')) # Directories: plotdir = paste0(imgdir, 'fusions/') imgpref = paste0(plotdir, 'AD430_fusions_') cmd = paste('mkdir -p', plotdir) system(cmd) # Load in the gene fusion data: # ----------------------------- source(paste0(bindir, 'mosaicism/fusions_AD430/load_fusioncalls.R')) source(paste0(bindir, 'mosaicism/genefusions/auxiliary_fusion_plotting.R')) sub.ctdf = sub.ctdf[grep("^SM_171013", sub.ctdf$dataset, invert=TRUE),] # Remove batch (v2) # Regression estimates for a number of selected covariates: # --------------------------------------------------------- run_regr = function(ctdf, only.exc, use.cc=TRUE, use.ec=TRUE, reg.var='subset.count'){ resdf = c() if (only.exc){ df = ctdf[ctdf$celltype == 'Excitatory',] } else { df = ctdf } for (var in test.vars){ print(var) form.vec = c(reg.var, '~ offset(log(n_counts)) + ', var) if (use.cc){ form.vec = c(form.vec, count.covar) } if (use.ec){ form.vec = c(form.vec, ext.covar) } if (length(unique(ctdf$celltype)) > 1){ form.vec = c(form.vec, ' + celltype') } # Fit model + process fit: form = asform(c(form.vec)) fit = try(glm.nb(form, df)) if ((class(fit)[1] != 'try-error')[1]){ cfit = processLmFit(fit) cfit$var = var resdf = rbind(resdf, cfit[2,]) } } resdf = resdf[order(resdf$p),] resdf$padj = p.adjust(resdf$p, method='fdr') resdf$col = with(resdf, ifelse(padj < 0.05, ifelse(Est < 0, 'Down', 'Up'), 'NS')) resdf = resdf[order(resdf$Est),] resdf$coef = varmap[resdf$coef] resdf$coef = factor(resdf$coef, levels=resdf$coef) return(resdf) } # Stratify by cell type and plot heatmap as well as intervals: # ------------------------------------------------------------ resfile = paste0(dbdir, 'fusions_AD430/', 'fusions_AD430_byct_regression_results.tsv') if (!file.exists(resfile)){ cts = unique(sub.ctdf$celltype) fulldf = c() for (ct in cts){ print(paste('--', ct, '--')) ct.ctdf = sub.ctdf[sub.ctdf$celltype == ct,] resdf = run_regr(ct.ctdf, only.exc=FALSE, reg.var='count') resdf$type = ct fulldf = rbind(fulldf, resdf) } write.table(fulldf, resfile, quote=F, sep="\t", row.names=F) } else { fulldf = read.delim(resfile, header=T) } # Plot all of these effects jointly: # ---------------------------------- labdf = fulldf[fulldf$padj < 0.05,] labdf$lab = paste0('p=', sprintf('%0.1e', labdf$padj)) ypos = 0.2 gp = ggplot(fulldf, aes(coef, Est, ymin=Est-2*SE, ymax=Est+2*SE, label=p, color=factor(col))) + facet_grid(. ~ type) + geom_point() + geom_errorbar(width=.2) + geom_hline(yintercept=0, lty='dotted') + geom_text(data=labdf, aes(coef, y=ypos, label=lab), color='black') + scale_color_manual(values=devals, name='Significance:') + theme_pubr() + coord_flip() + labs(x='Condition', y='Estimated effect size') regpref = paste0(imgpref, 'reg_effectsizes_results_byct_') saveGGplot(gp, paste0(regpref, 'errorbar'), w=8, h=4) # Plot the cell type results as a heatmap: # ----------------------------------------- cmat = pivot.tomatrix(fulldf[,c('Est','coef','type')], 'type','Est') pmat = pivot.tomatrix(fulldf[,c('padj','coef','type')], 'type','padj') mx = 0.5 mat.col_fun = colorRamp2(c(-mx, 0, mx), c("blue", "white", "red")) ux = 1.5 ht = Heatmap(cmat, name='Est', col=mat.col_fun, width = ncol(cmat)*unit(ux, "mm"), height = nrow(cmat)*unit(ux, "mm"), border_gp=gpar(color='black', lwd=.5), cell_fun = function(j, i, x, y, w, h, col){ if (pmat[i,j] < 0.05){ grid.text('*', x, y, gp=gpar(fontsize=gridtxt.fs)) } } ) h = 2 + 1 / 15 * nrow(cmat) w = 2 + 1 / 15 * ncol(cmat) pltpref = paste0(imgpref, 'reg_effectsizes_byct_heatmap') saveHeatmap(ht, pltpref, h=h, w=w)