#!/usr/bin/R # -------------------------------------------------- # Test if histone genes are differential by AD vars: # Updated: 09/06/22 # -------------------------------------------------- library(cbrbase) set_proj('DEVTRAJ') # Libraries: library(tidyr) library(ggplot2) library(ggpubr) library(ggrepel) library(ggbeeswarm) library(scales) library(RColorBrewer) library(Matrix) options(width=170) # Load in the two cohorts of candidate gene fusion counts: # -------------------------------------------------------- gbdir = paste0(bindir, 'mosaicism/genefusions/') source(paste0(gbdir, 'load_fusion_data.R')) source(paste0(gbdir, 'auxiliary_fusion_plotting.R')) source(paste0(gbdir, 'load_go_signatures.R')) source(paste0(gbdir, 'load_joint_expression.R')) source(paste0(bindir, 'multiRegion/auxiliary_plotting_settings.R')) # Make directories for analysis: # ------------------------------ analysis = 'fusion' project = 'ALZ' imgdir = paste0(img, project, '/', analysis , '/') imgpref = paste0(imgdir, analysis, "_mspred_strat_", project, '_') cmd = paste0('mkdir -p ', imgdir) system(cmd) # Preprocess data: # ---------------- make.map = function(df){ map = df[,1] names(map) = df[,2] return(map) } marg = colSums(fullmat) margi = colSums(fullmati) meta$braaksc = as.numeric(as.character(meta$braaksc)) meta$bd = ifelse(meta$braaksc >= 5, 'AD', 'CTRL') meta$bd = factor(meta$bd, levels=c('CTRL','AD')) meta = merge(meta, ext.meta[,c('projid','niareagansc')]) meta$nrad = ifelse(meta$niareagansc <= 2, 'AD', 'CTRL') meta$nrad = factor(meta$nrad, levels=c('CTRL','AD')) # Test differential histone genes: # -------------------------------- kwd = 'nucleosome' subtermdf = termdf[grep(kwd, termdf$GOTERM),] submap = gomap[gomap$GOID %in% subtermdf$GOID,] genes = sort(unique(submap$symbol)) # submap = gomap[gomap$symbol == 'H2AFJ',] genes = c('H2AFJ') genes = anno$symbol[grep("^H2A", anno$symbol)] genes = c(genes, anno$symbol[grep("^HIST", anno$symbol)]) ensgs = anno$ENSG[anno$symbol %in% genes] ensgs = ensgs[ensgs %in% rownames(fullmat)] ngenes = length(ensgs) cs = log1p(10000.0 * colSums(fullmat[ensgs,]) / marg) norm = as.matrix(log1p(1e4 * fullmat[ensgs,] / marg)) # norm = as.matrix(log1p(1e4 * fullmati[ensgs,] / margi)) ndf = as.data.frame(norm) ndf$ENSG = rownames(ndf) ndf = gather(ndf, sample, value, -ENSG) # Add genes + AD status: ndf = merge(ndf, anno[,c('symbol','ENSG','chr')]) ndf = merge(ndf, meta[,c('sample','nrad','relabel', 'cohort', 'bd')]) subdf = ndf[(ndf$relabel == 'Excitatory') & (ndf$cohort == 'ALZ'),] # Are signatures different in original cohort (NRAD): advals = c('AD'='indianred', 'CTRL'='royalblue') gp = ggplot(subdf, aes(symbol, value, fill=nrad)) + scale_fill_manual(values=advals, name='NIA-Reagan score') + geom_boxplot(outlier.size=0.5) + stat_compare_means(label='p.signif') + labs(x=paste('Gene'), y='Norm. expression, log(X+1)') + theme_pubr() + coord_flip() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5)) pltpref = paste0(imgpref, 'histonegenes_expr_celltype_exc_boxplot_nrad') saveGGplot(gp, pltpref, w=6, h=15) # Are signatures different in original cohort (BRAAK) gp = ggplot(subdf, aes(symbol, value, fill=bd)) + scale_fill_manual(values=advals, name='Late Braak Stage:') + geom_boxplot(outlier.size=0.5) + stat_compare_means(label='p.signif') + labs(x=paste('Gene'), y='Norm. expression, log(X+1)') + theme_pubr() + coord_flip() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5)) pltpref = paste0(imgpref, 'histonegenes_expr_celltype_exc_boxplot_bd') saveGGplot(gp, pltpref, w=6, h=15) # Regressions, plot DE coefficients: # ---------------------------------- subdf = ndf[ndf$cohort == 'ALZ',] subdf = subdf[!is.na(subdf$value),] subdf = subdf[!is.infinite(subdf$value),] celltypes = unique(subdf$relabel) genes = unique(subdf$symbol) fulldf = c() for (celltype in celltypes) { celldf = subdf[subdf$relabel == celltype,] resdf = c() for (gene in genes){ df = celldf[celldf$symbol == gene,] # Regression or wilcoxon (poiss?) form = as.formula('value ~ nrad') fit = glm(form, df, family='gaussian') cfit = processLmFit(fit) cfit$symbol = gene cfit$relabel = celltype resdf = rbind(resdf, cfit[grep('nrad', cfit$coef),]) } resdf = resdf[order(resdf$p),] resdf$padj = p.adjust(resdf$p, method='fdr') resdf$col = with(resdf, ifelse(padj < 0.05, ifelse(Est < 0, 'Down', 'Up'), 'NS')) fulldf = rbind(fulldf, resdf) } # Plot as a heatmap: # ------------------ # Save as heatmap: fulldf = merge(fulldf, unique(anno[,c('symbol','chr')])) subdf = fulldf[fulldf$chr == '6',] cmat = pivot.tomatrix(fulldf[,c('Est','symbol','relabel')], 'symbol','Est') pmat = pivot.tomatrix(fulldf[,c('padj','symbol','relabel')], 'symbol','padj') pmat[is.na(pmat)] = 1 mx = 0.25 mat.col_fun = colorRamp2(c(-mx, 0, mx), c("blue", "white", "red")) ux = 1.5 ht = Heatmap(cmat, name='Est', col=mat.col_fun, width = ncol(cmat)*unit(ux, "mm"), height = nrow(cmat)*unit(ux, "mm"), border_gp=gpar(color='black', lwd=.5), column_split = ifelse(1:ncol(cmat) %in% grep('^H2A', colnames(cmat)), 'H2A', 'HIST'), cell_fun = function(j, i, x, y, w, h, col){ if (pmat[i,j] < 0.05){ grid.text('*', x, y, gp=gpar(fontsize=gridtxt.fs)) } } ) h = 2 + 1 / 15 * nrow(cmat) w = 2 + 1 / 15 * ncol(cmat) pltpref = paste0(imgpref, 'histonegenes_expr_celltype_heatmap') saveHeatmap(ht, pltpref, h=h, w=w)