#!/usr/bin/R # ------------------------------------------- # Analyze whether the glial cells w/ Stage 2 gating: # 1) Have appropriate glial signatures # 2) Have the stage 2 signatures # ------------------------------------------- library(cbrbase) set_proj('DEVTRAJ', 'mosaicism') library(ggplot2) library(ggpubr) library(ggbeeswarm) library(tidyr) library(scales) library(qvalue) library(gprofiler2) library(ggrepel) source(paste0(sbindir, 'CKP25/load_metadata.R')) imgdir = paste0(img, 'CKP25/') imgdir2 = paste0(imgdir, 'difftl/') imgpref = paste0(imgdir2, 'gating_') cmd = paste('mkdir -p',imgdir, imgdir2) system(cmd) # --------------------------- # Load in the RNA-level data: # --------------------------- amat = read.delim(paste0(projdir, 'all_aggregated.counts_mat.tsv'), header=F) cn = scan(paste0(projdir, 'all_aggregated.cn.tsv'), 'c', quiet=TRUE) rn = scan(paste0(projdir, 'all_aggregated.rn.tsv'), 'c', quiet=TRUE) sn = scan(paste0(projdir, 'all_aggregated.rn.symb.tsv'), 'c', quiet=TRUE) sn2 = make.unique(sn) # Subset to protein coding genes: anno = read.delim('Annotation/gencode.vM25.basic.annotation.genes.tsv', header=F) names(anno) = c('chr','db','elem','start','end','v1','strand','v2','gene','type','symbol') pcgenes = anno$gene[anno$type == 'protein_coding'] gind = which(rn %in% pcgenes) rn = rn[gind] sn2 = sn2[gind] amat = amat[gind,] amat = as.matrix(amat) rownames(amat) = sn2 colnames(amat) = cn marg = colSums(amat) # Keep only the main cells: cind = which(cn %in% meta$cell) amat = amat[,cind] cn = cn[cind] # For norm: marg = colSums(amat) norm = sweep(amat, 2, marg / 10000, '/') norm = log(norm + 1) # ----------------------- # Load in the signatures: # ----------------------- sigdir = 'CKP25/CKp25_bulk_RNAseq/signatures/' fnlist = list.files(path=sigdir, pattern='*.csv') sigdfs = list() siglist = list() enrdf = c() lcwdf = c() for (fn in fnlist){ tag = sub(".csv","",fn) tag = sub("_l2fc1.*","",tag) df = read.delim(paste0(sigdir, fn), header=T, sep=",") if (tag == 'immune_signature'){ names(df)[1] = 'gene' } df = df[order(df$padj),] write.table(df$symbol, paste0(sigdir, tag, '_msigs.tsv'), quote=F, row.names=F, col.names=F, sep="\t") df = df[df$symbol %in% rownames(amat),] siglist[[tag]] = df$symbol sigdfs[[tag]] = df[,c('symbol','log2FoldChange')] # Intersect against various definitions: cs = log(10000 * colSums(amat[df$symbol,]) / marg + 1) enrdf = rbind(enrdf, data.frame(cell=names(cs), enr=cs, tag=tag)) # Using log2FC as weights: weights = 2^df$log2FoldChange / mean(2^df$log2FoldChange) submat = sweep(amat[df$symbol,], 1, weights,'*') cs = log(10000 * colSums(submat) / marg + 1) lcwdf = rbind(lcwdf, data.frame(cell=names(cs), enr=cs, tag=tag)) } enrdf = merge(enrdf, meta) lcwdf = merge(lcwdf, meta) # ----------------------------------------------- # Plot relative enrichment of different subtypes: # ----------------------------------------------- subdf = enrdf[enrdf$label %in% c('Microglia','Oligodendrocyte', 'Stage 2', 'Excitatory'),] runcomp <- list(c("Glia", "Stage 2")) #, c("Baseline", "Stage2")) gplot = ggplot(subdf, aes(celltype, enr, color=celltype, fill=celltype)) + facet_grid(label~tag) + # scale_color_manual(values=sublabel.col) + # scale_fill_manual(values=sublabel.col) + geom_violin(fill=NA) + geom_quasirandom(varwidth=TRUE, size=.1) + labs(x=paste('Gating label'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + # stat_compare_means(method='t.test', label='p.format') theme(axis.text.x = element_text(angle=90, hjust=1, vjust=.5)) + stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) ggsave(paste0(imgpref, 'signatures_expressionlevels_violin_gating.png'), gplot, dpi=450, units='in', width=8, height=8) ggsave(paste0(imgpref, 'signatures_expressionlevels_violin_gating.pdf'), gplot, dpi=450, units='in', width=8, height=8) runcomp <- list(c("Excitatory", "Stage 2"), c("Oligodendrocyte", "Excitatory"), c("Oligodendrocyte", "Stage 2"), c("Microglia", "Stage 2")) gplot = ggplot(subdf, aes(label, enr, color=label, fill=label)) + facet_grid(~tag) + scale_color_manual(values=label.col) + scale_fill_manual(values=label.col) + geom_violin(fill=NA) + geom_quasirandom(varwidth=TRUE, size=.1) + labs(x=paste('Single-cell cluster'), y='Norm. signature expression, log(X+1)') + theme_pubr() + theme(legend.position='none') + # stat_compare_means(method='t.test', label='p.format') theme(axis.text.x = element_text(angle=90, hjust=1, vjust=.5)) + stat_compare_means(comparisons=runcomp, label='p.signif', method='t.test', bracket.size=.2, tip.length=0.01, vjust=.5, step.increase=0.06) ggsave(paste0(imgpref, 'signatures_expressionlevels_violin_gating_flip.png'), gplot, dpi=450, units='in', width=6.5, height=5.5) ggsave(paste0(imgpref, 'signatures_expressionlevels_violin_gating_flip.pdf'), gplot, dpi=450, units='in', width=6.5, height=5.5) # Is there an individual-level artifact responsible for this mis-clustering? aggmeta = aggregate(cell ~ celltype + sublabel, meta, length) ggplot(aggmeta, aes(x=celltype, y=sublabel)) + geom_tile(aes(fill=cell)) + geom_text(aes(label=cell), color = "black", fontface = "bold", size = 6) + scale_x_discrete(expand = c(0,0)) + scale_y_discrete(expand = c(0,0)) + scale_fill_gradient("Legend \n", low = "lightblue", high = "blue") + theme_bw() aggmeta = aggregate(cell ~ celltype + short.sublabel + mouse_id, meta[meta$sublabel != 'Batch',], length) aggmeta$celltype = factor(aggmeta$celltype, levels = c('Glia','Baseline','Stage 1','Stage 2')) nn = c('In0','In1','Ex3','Ex2','Ex1','Ex0') expdf = data.frame(celltype=c('Stage 2','Glia','Glia','Glia', rep('Baseline', length(nn)), rep('Stage 1', length(nn))), short.sublabel=c('St.2','OPC','Mic.','Oli.', rep(nn,2))) expdf$score = 1 gplot = ggplot(aggmeta, aes(x=celltype, y=short.sublabel)) + facet_wrap(~mouse_id, nrow=2) + geom_tile(aes(fill=cell)) + geom_text(aes(label=cell), color = "black", fontface = "bold", size = 3) + geom_tile(data=expdf, aes(x=celltype, y=short.sublabel), fill=NA, color='red', lwd=.25) + scale_x_discrete(expand = c(0,0)) + scale_y_discrete(expand = c(0,0)) + scale_fill_gradient("Legend \n", low = "lightblue", high = "blue") + labs(x='Gating label', y='Single-cell cluster') + theme_bw() + theme(legend.position='none') + theme(axis.text.x = element_text(angle=90, hjust=1, vjust=.5)) ggsave(paste0(imgpref, 'gating_cluster_contingency.png'), gplot, dpi=450, units='in', width=6.5, height=5) ggsave(paste0(imgpref, 'gating_cluster_contingency.pdf'), gplot, dpi=450, units='in', width=6.5, height=5)