#!/usr/bin/R # ------------------------------------------------ # Plots on aggregate statistics for fusions in 10x # --follows P301S, updated for human # Updated: 01/11/23 # ------------------------------------------------ library(cbrbase) set_proj('DEVTRAJ', 'mosaicism') library(MASS) library(ggplot2) library(ggpubr) source(paste0(bindir, 'mosaicism/fusions_AD430/load_metadata.R')) source(paste0(bindir, 'multiRegion/auxiliary_plotting_settings.R')) # Directories: plotdir = paste0(imgdir, 'fusions/') imgpref = paste0(plotdir, 'AD430_fusions_') cmd = paste('mkdir -p', plotdir) system(cmd) # Load in the gene fusion data: # ----------------------------- source(paste0(bindir, 'mosaicism/fusions_AD430/load_fusioncalls.R')) source(paste0(bindir, 'mosaicism/genefusions/auxiliary_fusion_plotting.R')) # Aggregate rates for excitatory neurons or for full dataset: # ----------------------------------------------------------- fulldf = c() for (only.exc in c(TRUE, FALSE)){ if (only.exc){ df = ctdf[ctdf$celltype == 'Exc',] } else { df = ctdf} df$ncell = 1 df$n_counts[is.na(df$n_counts)] = 1 aggdf = aggregate(cbind(count, n_counts, n_genes, ncell) ~ dataset + projid + bd + nrad, df, sum) aggdf$rate = with(aggdf, count / n_counts) aggdf$rate.cell = with(aggdf, count / ncell) aggdf = aggdf[order(aggdf$rate),] aggdf$only.exc = only.exc fulldf = rbind(fulldf, aggdf) } # fulldf = fulldf[fulldf$count != 0,] # unprocessed - bc or alignment issues fulldf = fulldf[grep("^SM_171013", fulldf$dataset, invert=TRUE),] # Low quality samples, must remove # Add the ordinal diagnosis values: # --------------------------------- ord.vars = c('braaksc','niareagansc', 'cogdx', 'ceradsc') fulldf = merge(fulldf, unique(metadata[,c('projid', ord.vars, 'msex','age_rescaled','pmi_rescaled')])) colvals[['bd']] = colvals$nrad # Regression: df = fulldf[fulldf$only.exc == TRUE,] var = 'bd' form.vec = c('count ~ offset(log(n_counts)) + ', var) form.vec = c(form.vec, '+ log(n_genes) + msex + pmi_rescaled + age_rescaled') form = asform(form.vec) fit = glm.nb(form, df) # Plot boxplots for ordinal + binary diagnoses: # --------------------------------------------- for (var in c('bd','nrad', ord.vars)){ if (class(fulldf[[var]]) != 'factor'){ lvls = sort(unique(fulldf[[var]])) fulldf[[var]] = factor(fulldf[[var]], levels=lvls) } else { lvls = levels(fulldf[[var]]) } if (var %in% names(colvals)){ cols = colvals[[var]] } else { cols = pcols[1:length(lvls)] names(cols) = lvls } gp = ggplot(fulldf, aes_string('only.exc', 'rate', fill=var)) + scale_fill_manual(values=cols) + geom_boxplot() + labs(x='Only excitatory neurons?', y='Rate (counts / # reads (PFC) or # cells (HC))') + stat_compare_means(label='p.format') + theme_pubr() pltprefix = paste0(imgpref, 'boxplot_rate_', var) saveGGplot(gp, pltprefix, w=5, h=5) } # Plot the scatter against plaque, nft: # ------------------------------------- fulldf = merge(fulldf, aggregate(cbind(nft_mf, plaq_n_mf, gpath) ~ projid + braaksc, metadata, mean)) gp = ggplot(fulldf, aes(plaq_n_mf, rate, color=bd)) + scale_color_manual(values=colvals$nrad, name='Late-AD?') + geom_point(cex=.75) + stat_cor(color='black') + geom_smooth(method='lm', color='black') + theme_pubr() pltprefix = paste0(imgpref, 'scatter_plaq_n_mf_bd_rate') saveGGplot(gp, pltprefix, w=5, h=5) gp = ggplot(fulldf, aes(nft_mf, rate, color=bd)) + scale_color_manual(values=colvals$nrad, name='Late-AD?') + geom_point(cex=.75) + stat_cor(color='black') + geom_smooth(method='lm', color='black') + theme_pubr() pltprefix = paste0(imgpref, 'scatter_nft_mf_bd_rate') saveGGplot(gp, pltprefix, w=5, h=5) # Perform an unbiased search for top covariates vs. pseudobulk rate: # ------------------------------------------------------------------ df = fulldf[fulldf$only.exc == FALSE,] df = merge(df, metadata) vars = names(metadata) rm.vars = c('projid', 'scaled_to') vars = vars[!(vars %in% rm.vars)] fitdf = c() for (var in vars){ print(var) # 3 versions of regr: # Rate models # form = asform(c('cbind(count, n_counts) ~ pmi_rescaled + age_rescaled + study + msex + ', var)) # fit = try(glm(form, df, family='binomial')) # No assumptions: # form = asform(c('rate ~ pmi_rescaled + age_rescaled + study + msex + ', var)) # fit = try(glm(form, df, family='gaussian')) # Count models (but should do these at the cell level) form = asform(c('count ~ offset(log(n_counts)) + log(n_genes) + pmi_rescaled + age_rescaled + study + msex + ', var)) # fit = try(glm(form, df, family='poisson')) fit = try(glm.nb(form, df)) if (class(fit)[1] != 'try-error'){ cfit = processLmFit(fit) names(cfit)[3] = 'z' cfit = cfit[cfit$coef != '(Intercept)', , drop=F] cfit$var = var fitdf = rbind(fitdf, cfit) } } fitdf = fitdf[order(fitdf$p),] head(fitdf, 25) rmvar = c('log(n_genes)', 'studyROS ', 'pmi_rescaled') head(fitdf[!(fitdf$coef %in% rmvar),], 25) gp = ggplot(df, aes(only.exc, rate, fill=factor(bd))) + facet_wrap(~study) + geom_boxplot() + labs(x='Only excitatory neurons?', y='Rate (counts / # reads (PFC) or # cells (HC))') + stat_compare_means(label='p.format') + theme_pubr() gp = ggplot(df, aes(nft_ec, rate, color=factor(bd))) + facet_wrap(~ study) + scale_color_manual(values=colvals$nrad, name='Late-AD?') + geom_point(cex=.75) + stat_cor(color='black') + geom_smooth(method='lm', color='black') + labs(y='Rate (counts / # reads (PFC) or # cells (HC))') + theme_pubr() gp = ggplot(df, aes(cognep_random_slope, rate, color=factor(bd))) + facet_wrap(~ study) + scale_color_manual(values=colvals$nrad, name='Late-AD?') + geom_point(cex=.75) + stat_cor(color='black') + geom_smooth(method='lm', color='black') + labs(y='Rate (counts / # reads (PFC) or # cells (HC))') + stat_compare_means(label='p.format') + theme_pubr() # pltprefix = paste0(imgpref, 'boxplot_rate_', var) # saveGGplot(gp, pltprefix, w=5, h=5)