#!/usr/bin/R # ------------------------------------------- # Plot inflamm sigs for the CKP25 mouse data: # ------------------------------------------- library(cbrbase) set_proj('DEVTRAJ', 'mosaicism') library(ggplot2) library(ggpubr) library(ggbeeswarm) library(tidyr) library(scales) library(qvalue) library(gprofiler2) library(ggrepel) source(paste0(sbindir, 'CKP25/load_metadata.R')) imgdir = paste0(img, 'CKP25/') imgdir2 = paste0(imgdir, 'difftl/') imgpref = paste0(imgdir2, 'sigs_') cmd = paste('mkdir -p',imgdir, imgdir2) system(cmd) # --------------------------- # Load in the RNA-level data: # --------------------------- amat = read.delim(paste0(projdir, 'all_aggregated.counts_mat.tsv'), header=F) cn = scan(paste0(projdir, 'all_aggregated.cn.tsv'), 'c', quiet=TRUE) rn = scan(paste0(projdir, 'all_aggregated.rn.tsv'), 'c', quiet=TRUE) sn = scan(paste0(projdir, 'all_aggregated.rn.symb.tsv'), 'c', quiet=TRUE) sn2 = make.unique(sn) # Subset to protein coding genes: anno = read.delim('Annotation/gencode.vM25.basic.annotation.genes.tsv', header=F) names(anno) = c('chr','db','elem','start','end','v1','strand','v2','gene','type','symbol') pcgenes = anno$gene[anno$type == 'protein_coding'] gind = which(rn %in% pcgenes) rn = rn[gind] sn2 = sn2[gind] amat = amat[gind,] amat = as.matrix(amat) rownames(amat) = sn2 colnames(amat) = cn marg = colSums(amat) # Keep only the main cells: cind = which(cn %in% meta$cell) amat = amat[,cind] cn = cn[cind] # For norm: marg = colSums(amat) norm = sweep(amat, 2, marg / 10000, '/') norm = log(norm + 1) # ----------------------- # Load in the signatures: # ----------------------- sigdir = 'CKP25/CKp25_bulk_RNAseq/signatures/' fnlist = list.files(path=sigdir, pattern='*.csv') sigdfs = list() siglist = list() enrdf = c() lcwdf = c() for (fn in fnlist){ tag = sub(".csv","",fn) tag = sub("_l2fc1.*","",tag) df = read.delim(paste0(sigdir, fn), header=T, sep=",") if (tag == 'immune_signature'){ names(df)[1] = 'gene' } df = df[order(df$padj),] write.table(df$symbol, paste0(sigdir, tag, '_msigs.tsv'), quote=F, row.names=F, col.names=F, sep="\t") df = df[df$symbol %in% rownames(amat),] siglist[[tag]] = df$symbol sigdfs[[tag]] = df[,c('symbol','log2FoldChange')] # Intersect against various definitions: cs = log(10000 * colSums(amat[df$symbol,]) / marg + 1) enrdf = rbind(enrdf, data.frame(cell=names(cs), enr=cs, tag=tag)) # Using log2FC as weights: weights = 2^df$log2FoldChange / mean(2^df$log2FoldChange) submat = sweep(amat[df$symbol,], 1, weights,'*') cs = log(10000 * colSums(submat) / marg + 1) lcwdf = rbind(lcwdf, data.frame(cell=names(cs), enr=cs, tag=tag)) } meta = meta[meta$sublabel2 != 'Deid',] meta = meta[meta$label != 'Batch',] enrdf = merge(enrdf, meta) lcwdf = merge(lcwdf, meta) # ----------------------------------------------- # Plot relative enrichment of different subtypes: # ----------------------------------------------- subdf = enrdf[enrdf$label %in% c('Excitatory','Inhibitory','Stage 2'),] runcomp <- list(c("Ex0", "Ex2"), c("Ex0", "Ex3"), c("Ex0", "St.2"), c('Ex2', 'Ex3'), c('Ex2', 'St.2'), c('Ex3', 'St.2')) gplot = ggplot(subdf, aes(short.sublabel, enr, color=sublabel, fill=sublabel)) + # facet_wrap(~tag, nrow=3, scale='free_y') + facet_wrap(~tag, nrow=1, scale='free_y') + scale_color_manual(values=sublabel.col) + scale_fill_manual(values=sublabel.col) + geom_violin(fill=NA) + geom_quasirandom(varwidth=TRUE, size=.3) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + # stat_compare_means(method='t.test', label='p.format') stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) ggsave(paste0(imgpref, 'signatures_expressionlevels_violin.png'), gplot, dpi=450, units='in', width=9, height=4) ggsave(paste0(imgpref, 'signatures_expressionlevels_violin.pdf'), gplot, dpi=450, units='in', width=9, height=4) # Weighted subdf = lcwdf[lcwdf$label %in% c('Excitatory','Inhibitory','Stage 2'),] gplot = ggplot(subdf, aes(short.sublabel, enr, color=sublabel, fill=sublabel)) + facet_wrap(~tag, nrow=1, scale='free_y') + scale_color_manual(values=sublabel.col) + scale_fill_manual(values=sublabel.col) + geom_violin(fill=NA) + geom_quasirandom(varwidth=TRUE, size=.3) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) ggsave(paste0(imgpref, 'signatures_weighted_expressionlevels_violin.png'), gplot, dpi=450, units='in', width=9, height=4) ggsave(paste0(imgpref, 'signatures_weighted_expressionlevels_violin.pdf'), gplot, dpi=450, units='in', width=9, height=4) # Merge at the individual level and plot individual-level boxplots: # ----------------------------------------------------------------- subdf = enrdf[enrdf$label %in% c('Excitatory','Inhibitory','Stage 2'),] aggdf = aggregate(enr ~ mouse_id + sublabel + label + short.sublabel + tag + genotype, subdf, mean) aggdf$short.sublabel = factor(aggdf$short.sublabel, levels=c('In0','In1','Ex0','Ex1','Ex2','Ex3','St.2')) gplot = ggplot(aggdf, aes(short.sublabel, enr, fill=sublabel)) + # facet_wrap(~tag, nrow=3, scale='free_y') + facet_wrap(~tag, nrow=1, scale='free_y') + # scale_color_manual(values=sublabel.col) + scale_fill_manual(values=sublabel.col) + geom_boxplot(outlier.shape=NA, alpha=1, lwd=.5) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + # stat_compare_means(method='t.test', label='p.format') stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) ggsave(paste0(imgpref, 'signatures_expressionlevels_indiv_boxplot.png'), gplot, dpi=450, units='in', width=9, height=4) ggsave(paste0(imgpref, 'signatures_expressionlevels_indiv_boxplot.pdf'), gplot, dpi=450, units='in', width=9, height=4) gplot = ggplot(aggdf[aggdf$genotype == 'CKp25',], aes(short.sublabel, enr, fill=sublabel)) + facet_wrap(~tag, nrow=1, scale='free_y') + scale_fill_manual(values=sublabel.col) + geom_boxplot(outlier.shape=NA, alpha=1) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.35) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + # stat_compare_means(method='t.test', label='p.format') stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) ggsave(paste0(imgpref, 'signatures_expressionlevels_indiv_CKp25_boxplot.png'), gplot, dpi=450, units='in', width=7.5, height=4) ggsave(paste0(imgpref, 'signatures_expressionlevels_indiv_CKp25_boxplot.pdf'), gplot, dpi=450, units='in', width=7.5, height=4) g2 = gplot + coord_flip() + facet_wrap(~tag, nrow=3, scale='free_x') ggsave(paste0(imgpref, 'signatures_expressionlevels_indiv_CKp25_boxplot_vert.png'), g2, dpi=450, units='in', width=4, height=5) ggsave(paste0(imgpref, 'signatures_expressionlevels_indiv_CKp25_boxplot_vert.pdf'), g2, dpi=450, units='in', width=4, height=5) subdf = aggdf[aggdf$tag %in% c('Stage1_vs_Baseline', 'Stage2_vs_Baseline') & aggdf$sublabel %in% c('Ex0','Stage 2'),] gplot = ggplot(subdf, aes(short.sublabel, enr, fill=sublabel, color=genotype)) + facet_wrap(~tag, nrow=1, scale='free_y') + scale_color_manual(values=c('grey50','black'), name='') + scale_fill_manual(values=sublabel.col, name='') + geom_boxplot(outlier.shape=NA, alpha=1) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + stat_compare_means(label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) ggsave(paste0(imgpref, 'signatures_expressionlevels_indiv_geno_boxplot.png'), gplot, dpi=450, units='in', width=5, height=5) ggsave(paste0(imgpref, 'signatures_expressionlevels_indiv_geno_boxplot.pdf'), gplot, dpi=450, units='in', width=5, height=5) # -------------------------------------------------------------- # Differential analysis between the Stage 2 mice in CK vs. CKp25 # -------------------------------------------------------------- ct = 'Stage 2' ocells = meta$cell[meta$sublabel %in% ct] cells1 = meta$cell[meta$sublabel %in% ct & meta$genotype == 'CK'] cells2 = meta$cell[meta$sublabel %in% ct & meta$genotype == 'CKp25'] smat = amat[,ocells] # Remove if mean == 0: meanss = apply(smat,1, mean) smat = smat[meanss != 0,] print(dim(smat)) # Testable genes: genes = rownames(smat) test.genes = genes test.genes = test.genes[grep("^AC[0-9]*\\.[0-9]*$", test.genes, invert=TRUE)] # Pseudo/unlabeled NTEST = length(test.genes) # Normalize and log1p transform: smarg = apply(smat, 2, sum) nmat = sweep(smat, 2, median(smarg) / smarg, '*') nmat = log(nmat + 1) pvals = rep(1, NTEST) xmean = rep(0, NTEST) ymean = rep(0, NTEST) for (i in 1:NTEST){ gene = test.genes[i] x = nmat[gene, cells1] y = nmat[gene, cells2] xmean[i] = mean(x) ymean[i] = mean(y) pvals[i] = wilcox.test(x, y, alternative='two.sided')$p.value } pvdf = data.frame(sublabel=ct, geno1='CK', geno2='CKp25', testtype='genotype', symbol=test.genes, mean.in=xmean, mean.out=ymean, pvalue=pvals) pvdf$p.adj = p.adjust(pvals) pvdf$qvalue = qvalue(pvals)$qvalues pvdf$pvalue[is.na(pvdf$pvalue)] = 1 pvdf$p.adj[is.na(pvdf$p.adj)] = 1 pvdf = pvdf[order(pvdf$pvalue), ] # ------------- # Plot volcano: # ------------- pvdf$log2FC = log2(pvdf$mean.out / pvdf$mean.in) pvdf$col = (pvdf$qvalue < 0.05) * (1 + 1 * (pvdf$log2FC > 0)) gplot = ggplot(pvdf, aes(log2FC, -log10(pvalue), color=factor(col))) + scale_color_manual(values=c('grey75','royalblue','indianred')) + geom_point() + labs(x=paste('log2FC (CKp25 / CK)'), y='-log10(p-value)') + geom_text_repel(data=pvdf[pvdf$col != 0,], aes(label=symbol)) + scale_y_continuous(expand=c(0,0)) + geom_vline(xintercept=0, lty='dashed') + theme_pubr() + theme(legend.position='none') ggsave(paste0(imgpref, 'difftl_wilcoxon_stage2_genotype_volcano.png'), gplot, dpi=450, units='in', width=4.5, height=5) # Compare the go enrichments: ckgene = head(pvdf$symbol[pvdf$mean.in > pvdf$mean.out],50) p25gene = head(pvdf$symbol[pvdf$mean.in < pvdf$mean.out],50) gobj1 = gost(ckgene)$result gobj2 = gost(p25gene)$result gobj1[c(grep("GO",gobj1$source), grep("REAC",gobj1$source)), c('p_value','source','term_name')] gobj2[c(grep("GO",gobj2$source), grep("REAC",gobj2$source)), c('p_value','source','term_name')] # ----------------------------------- # Compare to the original signatures: # ----------------------------------- # What is present in both Stage 2 sig / CK Stage 2 gnlist = siglist[['Stage2_vs_Baseline']] subdf = pvdf[pvdf$symbol %in% gnlist,] subdf = subdf[subdf$mean.in > 0.1 & subdf$mean.out > 0.1,] gobj = gost(head(subdf$symbol,50))$result gdf = gobj[c(grep("GO", gobj$source), grep("REAC",gobj$source)), c('p_value','source','term_name')] gdf = gdf[order(gdf$p_value),] # gdf[gdf$p_value < 0.0001,] head(gdf, 50) # ------------------------------------- # Repeat signature plotting for immune: # ------------------------------------- inflamm_sig = c('Ccl2','Ccl20','Cxcl10','Cxcl16','Il1a','Il6','Il15','Il18', 'Ifitm3','Ifnar1','Ifngr1','Irf7','Irf9','Jak3','Myd88', 'Nfkbia','Rela','Relb','Socs3','Tir3','Traf2', 'Psmb8','Psmb9', 'Cgas','Ddx41','Ddx58','Ifih1','Ifit1', 'Isg20','Oasl2','Pkr','Zbp1', 'Cdk5','Cdk5r1','Cdkn1a','Cdkn2a') tag = 'Inflammatory_Signature' inflamm_sig = inflamm_sig[inflamm_sig %in% rownames(amat)] cs = log(10000 * colSums(amat[inflamm_sig,]) / marg + 1) infdf = data.frame(cell=names(cs), enr=cs, tag=tag) infdf = merge(infdf, meta) subdf = infdf[infdf$label %in% c('Excitatory','Inhibitory','Stage 2'),] runcomp <- list(c("Ex0", "Ex2"), c("Ex0", "Ex3"), c("Ex0", "St.2"), c('Ex2', 'Ex3'), c('Ex2', 'St.2'), c('Ex3', 'St.2')) gplot = ggplot(subdf[subdf$genotype == 'CKp25',], aes(short.sublabel, enr, color=sublabel, fill=sublabel)) + facet_wrap(~tag, nrow=1, scale='free_y') + scale_color_manual(values=sublabel.col) + scale_fill_manual(values=sublabel.col) + geom_violin(fill=NA) + geom_quasirandom(varwidth=TRUE, size=.3) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) ggsave(paste0(imgpref, 'signatures_inflam_expressionlevels_violin.png'), gplot, dpi=450, units='in', width=4, height=4) ggsave(paste0(imgpref, 'signatures_inflam_expressionlevels_violin.pdf'), gplot, dpi=450, units='in', width=4, height=4) # Aggregated to mouse-level: aggdf = aggregate(enr ~ mouse_id + sublabel + label + short.sublabel + tag + genotype, subdf, mean) gplot = ggplot(aggdf[aggdf$genotype == 'CKp25',], aes(short.sublabel, enr, fill=sublabel)) + facet_wrap(~tag, nrow=1, scale='free_y') + scale_fill_manual(values=sublabel.col) + geom_boxplot(outlier.shape=NA, alpha=0.5) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + labs(x=paste('Sub-celltype'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + # stat_compare_means(method='t.test', label='p.format') stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) ggsave(paste0(imgpref, 'signatures_inflam_expressionlevels_indiv_CKp25_boxplot.png'), gplot, dpi=450, units='in', width=4, height=4) ggsave(paste0(imgpref, 'signatures_inflam_expressionlevels_indiv_CKp25_boxplot.pdf'), gplot, dpi=450, units='in', width=4, height=4)