#!/usr/bin/R # ------------------------------------------------ # Estimate effect sizes for gene candidate fusions # --follows P301S, updated for human # Updated: 01/11/23 # ------------------------------------------------ library(cbrbase) set_proj('DEVTRAJ', 'mosaicism') library(MASS) library(ggplot2) library(ggpubr) source(paste0(bindir, 'mosaicism/fusions_AD430/load_metadata.R')) source(paste0(bindir, 'multiRegion/auxiliary_plotting_settings.R')) # Directories: plotdir = paste0(imgdir, 'fusions/') imgpref = paste0(plotdir, 'AD430_fusions_') cmd = paste('mkdir -p', plotdir) system(cmd) # Load in the gene fusion data: # ----------------------------- source(paste0(bindir, 'mosaicism/fusions_AD430/load_fusioncalls.R')) source(paste0(bindir, 'mosaicism/genefusions/auxiliary_fusion_plotting.R')) sub.ctdf = sub.ctdf[grep("^SM_171013", sub.ctdf$dataset, invert=TRUE),] # Remove batch (v2) fulldf = c() reg.var = 'count' # for (reg.var in reg.vars){ for (only.exc in c(TRUE, FALSE)){ cat(reg.var, '\t', only.exc, '\n') df = sub.ctdf if (only.exc){ df = df[df$celltype == 'Exc',] } resdf = c() for (var in test.vars){ cat('--', var, '\n') form.vec = c(reg.var, ' ~ offset(log(n_counts)) + ', var) form.vec = c(form.vec, count.covar, ext.covar) if (!only.exc){ form.vec = c(form.vec, ' + celltype') } form = asform(form.vec) # Fit model + process fit: if (reg.var %in% c('capped_count','count')){ fit = glm.nb(form, df) } else if (reg.var %in% c('has_fusion')){ fit = glm(form, df, family='binomial') } else { fit = glm(form, df, family='gaussian') } cfit = processLmFit(fit) cfit$var = var resdf = rbind(resdf, cfit[2,]) } resdf = resdf[order(resdf$p),] resdf$padj = p.adjust(resdf$p, method='fdr') resdf$col = with(resdf, ifelse(padj < 0.05, ifelse(Est < 0, 'Down', 'Up'), 'NS')) resdf = resdf[order(resdf$Est),] resdf$coef = resdf$coef resdf$coef = factor(resdf$coef, levels=resdf$coef) resdf$set = ifelse(only.exc, 'Exc. neurons', 'All cells') resdf$reg.var = reg.var fulldf = rbind(fulldf, resdf) } # Plot all of these effects jointly: # ---------------------------------- labdf = fulldf[fulldf$padj < 0.05,] labdf$lab = paste0('p=', sprintf('%0.1e', labdf$padj)) ypos = 0.2 gp = ggplot(fulldf, aes(coef, Est, ymin=Est-2*SE, ymax=Est+2*SE, label=p, color=factor(col))) + facet_grid(. ~ set) + geom_point() + geom_errorbar(width=.2) + geom_hline(yintercept=0, lty='dotted') + geom_text(data=labdf, aes(coef, y=ypos, label=lab), color='black') + scale_color_manual(values=devals, name='Significance:') + theme_pubr() + coord_flip() + labs(x='Condition', y='Estimated effect size') regpref = paste0(imgpref, 'reg_effectsizes_results_') saveGGplot(gp, paste0(regpref, 'errorbar'), w=6, h=2.5) # Repeat, separating out batches: # ------------------------------- sub.ctdf$batch = sub("-.*","", sub.ctdf$dataset) batchlist = unique(sub.ctdf$batch) fulldf = c() reg.var = 'count' for (batch in batchlist){ for (only.exc in c(TRUE, FALSE)){ cat(batch, '\t', only.exc, '\n') df = sub.ctdf[sub.ctdf$batch == batch,] if (only.exc){ df = df[df$celltype == 'Exc',] } resdf = c() for (var in test.vars){ cat('--', var, '\n') form.vec = c(reg.var, ' ~ offset(log(n_counts)) +', var) form.vec = c(form.vec, count.covar, ext.covar) if (!only.exc){ form.vec = c(form.vec, ' + celltype') } form = asform(form.vec) if (reg.var %in% c('capped_count','count')){ fit = glm.nb(form, df) } else if (reg.var %in% c('has_fusion')){ fit = glm(form, df, family='binomial') } else { fit = glm(form, df, family='gaussian') } cfit = processLmFit(fit) cfit$var = var resdf = rbind(resdf, cfit[2,]) } resdf = resdf[order(resdf$p),] resdf$padj = p.adjust(resdf$p, method='fdr') resdf$col = with(resdf, ifelse(padj < 0.05, ifelse(Est < 0, 'Down', 'Up'), 'NS')) resdf = resdf[order(resdf$Est),] resdf$coef = resdf$coef resdf$coef = factor(resdf$coef, levels=resdf$coef) resdf$set = ifelse(only.exc, 'Exc. neurons', 'All cells') resdf$reg.var = reg.var resdf$batch = batch fulldf = rbind(fulldf, resdf) } } # Plot all of these effects jointly: # ---------------------------------- fulldf = fulldf[fulldf$batch != 'SM_Last16',] labdf = fulldf[fulldf$padj < 0.05,] labdf$lab = paste0('p=', sprintf('%0.1e', labdf$padj)) gp = ggplot(fulldf, aes(coef, Est, ymin=Est-2*SE, ymax=Est+2*SE, label=p, color=factor(col))) + facet_grid(batch ~ set) + geom_point() + geom_errorbar(width=.2) + geom_hline(yintercept=0, lty='dotted') + geom_text(data=labdf, aes(coef, y=0.4, label=lab), color='black') + scale_color_manual(values=devals, name='Significance:') + theme_pubr() + coord_flip() + labs(x='Condition', y='Estimated effect size') regpref = paste0(imgpref, 'reg_effectsizes_results_batches_') saveGGplot(gp, paste0(regpref, 'errorbar'), w=10, h=12) # NB-LMM with Nebula because everything else is not runnable: # ----------------------------------------------------------- library(nebula) # ll = makeRegFormula(pathdf, path=path, nruv=10) fulldf = c() reg.var = 'count' for (only.exc in c(TRUE, FALSE)){ cat(reg.var, '\t', only.exc, '\n') df = sub.ctdf if (only.exc){ df = df[df$celltype == 'Exc',] } resdf = c() rownames(df) = NULL # Remove NA values: df = df[!is.na(df$pmi_rescaled),] df = df[!is.na(df$age_rescaled),] # Counts to model: submat = t(as.matrix(df[,'count', drop=F])) offset = log(df$n_counts) resdf = c() for (var in test.vars){ cat('--', var, '\n') form.vec = c(reg.var, ' ~ offset(log(n_counts)) + ', var) form.vec = c(form.vec, count.covar, ext.covar) if (!only.exc){ form.vec = c(form.vec, ' + celltype') } form = asform(form.vec) mdx = model.matrix(form, data=df) # Fit model + process fit: re = nebula(submat, as.character(df$projid), pred=mdx, offset=offset, model='NBLMM') rdf = re$summary cn = colnames(rdf) rdf = rdf[,cn[grep(var, cn)]] rdf$coef = sub('[a-zA-Z]*_', '', names(rdf)[1]) names(rdf) = sub('_.*', '', names(rdf)) resdf = rbind(resdf, rdf) } resdf = resdf[order(resdf$p),] resdf$padj = p.adjust(resdf$p, method='fdr') resdf$col = with(resdf, ifelse(padj < 0.05, ifelse(Est < 0, 'Down', 'Up'), 'NS')) resdf = resdf[order(resdf$logFC),] resdf$coef = varmap[resdf$coef] resdf$coef = factor(resdf$coef, levels=resdf$coef) resdf$set = ifelse(only.exc, 'Exc. neurons', 'All cells') resdf$reg.var = reg.var fulldf = rbind(fulldf, resdf) } # Plot all of these effects jointly: # ---------------------------------- labdf = fulldf # [fulldf$padj < 0.05,] labdf$lab = paste0('p=', sprintf('%0.1e', labdf$padj)) ypos = 0.2 gp = ggplot(fulldf, aes(coef, logFC, ymin=logFC-2*se, ymax=logFC+2*se, label=p, color=factor(col))) + facet_grid(. ~ set) + geom_point() + geom_errorbar(width=.2) + geom_hline(yintercept=0, lty='dotted') + geom_text(data=labdf, aes(coef, y=ypos, label=lab), color='black') + scale_color_manual(values=devals, name='Significance:') + theme_pubr() + coord_flip() + labs(x='Condition', y='Estimated effect size') regpref = paste0(imgpref, 'reg_effectsizes_NEBULA_results_') saveGGplot(gp, paste0(regpref, 'errorbar'), w=8, h=3)