#!/usr/bin/R # ------------------------------------------ # Plot basic stats for the CKP25 mouse data: # ------------------------------------------ library(cbrbase) set_proj('DEVTRAJ', 'mosaicism') library(ggplot2) library(ggpubr) library(ggbeeswarm) library(tidyr) library(scales) library(RColorBrewer) source(paste0(sbindir, 'CKP25/load_metadata.R')) # Directories: imgdir = paste0(img, 'CKP25/') imgdir2 = paste0(imgdir, 'stats/') imgpref = paste0(imgdir2, 'stats_') cmd = paste('mkdir -p',imgdir, imgdir2) system(cmd) write.table(meta, file='CKP25/CKp25_QCed_cells_metadata.tsv', quote=F, row.names=F, col.names=T, sep="\t") smeta = meta[meta$label != 'Batch',] ind = sample(1:nrow(smeta), nrow(smeta), replace=FALSE) smeta = smeta[ind,] # Cell quality stats: # ------------------- g1 = ggplot(smeta, aes(sublabel2, n_genes, fill=sublabel2)) + geom_violin() + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::comma) + theme_pubr() + labs(x='scRNA sub-celltype', y='Number of genes') + theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position="none") g2 = ggplot(smeta, aes(sublabel2, n_counts, fill=sublabel2)) + geom_violin() + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::comma) + theme_pubr() + labs(x='scRNA sub-celltype', y='Number of counts') + theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position="none") garr = ggarrange(g1, g2, ncol=1, nrow=2, align='hv') ggsave(paste0(imgpref, 'qcstats_cell_sublabel2.png'), garr, width=4, height=8, units='in', dpi=450) ggsave(paste0(imgpref, 'qcstats_cell_sublabel2.pdf'), garr, width=4, height=8) # --------------------- # Plots of proportions: # --------------------- gplot = ggplot(smeta, aes(mouse_id, fill=label)) + geom_bar(position='fill') + scale_fill_manual(values=label.col, name='Celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_labels.png') ,gplot, width=6, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_labels.pdf') ,gplot, width=6, height=5) # Plot proportions: gplot = ggplot(smeta, aes(mouse_id, fill=sublabel)) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_sublabels.png') ,gplot, width=6, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_sublabels.pdf') ,gplot, width=6, height=5) # Plot proportions: gplot = ggplot(smeta, aes(mouse_id, fill=sublabel2)) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_sublabels2.png') ,gplot, width=6, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_sublabels2.pdf') ,gplot, width=6, height=5) submeta = meta[meta$label %in% c('Excitatory','Inhibitory','Stage 2'),] # Plot proportions: gplot = ggplot(submeta, aes(mouse_id, fill=sublabel)) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + labs(x='Mouse ID', y='Percent of cells')+ theme_pubr() + theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'subnrn_proportions_sublabels.png') ,gplot, width=6, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'subnrn_proportions_sublabels.pdf') ,gplot, width=6, height=5) # Plot proportions: gplot = ggplot(meta, aes(celltype, fill=sublabel)) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='scRNA celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position='right') ggsave(paste0(imgpref, 'proportions_sublabels_celltype.png') ,gplot, width=5, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_sublabels_celltype.pdf') ,gplot, width=5, height=5) # Plot proportions: gplot = ggplot(meta, aes(celltype, fill=sublabel2)) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='scRNA celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position='right') ggsave(paste0(imgpref, 'proportions_sublabels2_celltype.png') ,gplot, width=5, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_sublabels2_celltype.pdf') ,gplot, width=5, height=5) # Plot proportions: gplot = ggplot(meta, aes(sublabel, fill=celltype)) + geom_bar(position='fill') + scale_fill_manual(values=celltype.col, name='Gating celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='scRNA celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position='right') ggsave(paste0(imgpref, 'proportions_celltype_sublabels.png') ,gplot, width=5, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_celltype_sublabels.pdf') ,gplot, width=5, height=5) # Plot proportions: gplot = ggplot(meta, aes(sublabel, fill=celltype)) + facet_wrap(~genotype) + geom_bar(position='fill') + scale_fill_manual(values=celltype.col, name='Gating celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='scRNA celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position='right') ggsave(paste0(imgpref, 'proportions_celltype_sublabels_geno.png') ,gplot, width=8, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_celltype_sublabels_geno.pdf') ,gplot, width=8, height=5) # Plot proportions: gplot = ggplot(meta, aes(sublabel2, fill=celltype)) + geom_bar(position='fill') + scale_fill_manual(values=celltype.col, name='Gating celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='scRNA celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position='right') ggsave(paste0(imgpref, 'proportions_celltype_sublabels2.png') ,gplot, width=5, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_celltype_sublabels2.pdf') ,gplot, width=5, height=5) # Plot proportions by genotype gplot = ggplot(meta, aes(sublabel2, fill=celltype)) + facet_wrap(~genotype) + geom_bar(position='fill') + scale_fill_manual(values=celltype.col, name='Gating celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='scRNA celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position='right') ggsave(paste0(imgpref, 'proportions_celltype_sublabels2_geno.png') ,gplot, width=8, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_celltype_sublabels2_geno.pdf') ,gplot, width=8, height=5) # Plot proportions by mouse: gplot = ggplot(meta, aes(sublabel2, fill=celltype)) + facet_wrap(~mouse_id) + geom_bar(position='fill') + scale_fill_manual(values=celltype.col, name='Gating celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='scRNA celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position='right') ggsave(paste0(imgpref, 'proportions_celltype_sublabels2_mouse.png') ,gplot, width=9, height=6, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_celltype_sublabels2_mouse.pdf') ,gplot, width=9, height=6) # Plot proportions by mouse: gplot = ggplot(meta, aes(sublabel2, fill=celltype)) + facet_wrap(~mouse_id) + geom_bar(position='stack') + scale_fill_manual(values=celltype.col, name='Gating celltype:') + scale_y_continuous(expand=c(0,0)) + theme_pubr() + labs(x='scRNA celltype', y='Number of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position='right') ggsave(paste0(imgpref, 'counts_celltype_sublabels2_mouse.png') ,gplot, width=9, height=6, units='in', dpi=450) ggsave(paste0(imgpref, 'counts_celltype_sublabels2_mouse.pdf') ,gplot, width=9, height=6) # Plot proportions: gplot = ggplot(meta, aes(mouse_id, fill=celltype)) + geom_bar(position='fill') + scale_fill_manual(values=celltype.col, name='Celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_celltype.png') ,gplot, width=6, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_celltype.pdf') ,gplot, width=6, height=5) # Plot proportions: gplot = ggplot(submeta, aes(timepoint, fill=sublabel)) + facet_wrap(~genotype) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_geno_time_sublabel.png') ,gplot, width=5, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_geno_time_sublabel.pdf') ,gplot, width=5, height=5) # Plot proportions by has_p25 gplot = ggplot(meta, aes(sublabel2, fill=has_p25)) + facet_wrap(~genotype) + geom_bar(position='fill') + scale_fill_manual(values=c('grey80','slateblue'), name='p25 detected') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='scRNA celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_geno_hasp25_sublabel.png') ,gplot, width=5, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_geno_hasp25_sublabel.pdf') ,gplot, width=5, height=5) # -------------------------------------------------------- # Test: are the sub-neuronal fraction changes significant: # -------------------------------------------------------- submeta = meta[meta$label %in% c('Excitatory','Inhibitory','Stage 2'),] totct = agg.rename(cell ~ mouse_id + genotype, submeta, length, 'tot.cell') possct = expand.grid(mouse_id=unique(meta$mouse_id), sublabel=unique(submeta$sublabel)) subct = aggregate(cell ~ sublabel + mouse_id, submeta, length) subct = merge(possct, subct, all.x=TRUE) subct = merge(subct, totct) subct$cell[is.na(subct$cell)] = 0 subct$frac = subct$cell / subct$tot.cell subct = merge(subct, unique(meta[,c('sublabel','short.sublabel')])) gplot = ggplot(subct, aes(short.sublabel, frac, fill=genotype)) + geom_boxplot(outlier.shape=NA) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + scale_fill_manual(values=c('grey85','grey50'), name='Genotype:') + scale_y_continuous(labels=scales::percent, lim=c(0,.85)) + theme_pubr() + labs(x='Celltype', y='Percent of neuronalcells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + stat_compare_means(method='t.test', label='p.format') ggsave(paste0(imgpref, 'subneuronal_prop_genotype.png') ,gplot, width=7, height=5.5, units='in', dpi=450) ggsave(paste0(imgpref, 'subneuronal_prop_genotype.pdf') ,gplot, width=7, height=5.5) # -------------------------------------------------------------------- # Test: are the sub-neuronal fraction changes significant across time: # -------------------------------------------------------------------- submeta = meta[meta$label %in% c('Excitatory','Inhibitory','Stage 2'),] totct = agg.rename(cell ~ mouse_id + genotype + timepoint, submeta, length, 'tot.cell') possct = expand.grid(mouse_id=unique(meta$mouse_id), sublabel=unique(submeta$sublabel), timepoint=unique(submeta$timepoint)) subct = aggregate(cell ~ sublabel + mouse_id + timepoint, submeta, length) subct = merge(possct, subct, all.x=TRUE) subct = merge(subct, totct) subct$cell[is.na(subct$cell)] = 0 subct$frac = subct$cell / subct$tot.cell subct = merge(subct, unique(meta[,c('sublabel','short.sublabel')])) gplot = ggplot(subct, aes(short.sublabel, frac, fill=timepoint)) + facet_wrap(~genotype) + geom_boxplot(outlier.shape=NA) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + scale_fill_manual(values=c('grey85','grey50'), name='Time-point:') + scale_y_continuous(labels=scales::percent, lim=c(0,.85)) + theme_pubr() + labs(x='Celltype', y='Percent of neuronal cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + stat_compare_means(method='t.test', label='p.format') ggsave(paste0(imgpref, 'subneuronal_prop_genotype_timepoint.png') ,gplot, width=10, height=5.5, units='in', dpi=450) ggsave(paste0(imgpref, 'subneuronal_prop_genotype_timepoint.pdf') ,gplot, width=10, height=5.5) # ----------------------------------------------------- # Test: is the OPC difference in fractions significant: # ----------------------------------------------------- meta$is.glial = meta$label %in% c('Oligodendrocyte','Microglia','OPC') submeta = meta[meta$is.glial,] totct = agg.rename(cell ~ mouse_id + genotype, submeta, length, 'tot.cell') possct = expand.grid(mouse_id=unique(meta$mouse_id), label=unique(submeta$label)) subct = aggregate(cell ~ label + mouse_id, submeta, length) subct = merge(possct, subct, all.x=TRUE) subct = merge(subct, totct) subct$cell[is.na(subct$cell)] = 0 subct$frac = subct$cell / subct$tot.cell subct = merge(subct, unique(meta[,c('label','short.label')])) geno.col = c('CK' = 'lightblue','CKp25'='indianred') gplot = ggplot(subct, aes(short.label, frac, fill=genotype)) + geom_boxplot(outlier.shape=NA) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.5) + scale_fill_manual(values=geno.col, name='Genotype:') + # scale_fill_manual(values=label.col, name='Celltype:') + scale_y_continuous(labels=scales::percent, lim=c(0,.85)) + theme_pubr() + labs(x='Celltype', y='Percent of glial cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + stat_compare_means(method='t.test', label='p.signif') + theme(legend.position='none') ggsave(paste0(imgpref, 'glial_prop_genotype.png') ,gplot, width=2.5, height=4, units='in', dpi=450) ggsave(paste0(imgpref, 'glial_prop_genotype.pdf') ,gplot, width=2.5, height=4) # ----------------------------------------------------- # Test: what is % of Stage 2 from "Undamaged" # ----------------------------------------------------- submeta = meta[meta$celltype == 'Undamaged',] totct = agg.rename(cell ~ mouse_id + genotype, submeta, length, 'tot.cell') possct = expand.grid(mouse_id=unique(meta$mouse_id), label=unique(submeta$label)) subct = aggregate(cell ~ label + mouse_id, submeta, length) subct = merge(possct, subct, all.x=TRUE) subct = merge(subct, totct) subct$cell[is.na(subct$cell)] = 0 subct$frac = subct$cell / subct$tot.cell subct = merge(subct, unique(meta[,c('label','short.label')])) gplot = ggplot(subct, aes(short.label, frac, fill=genotype)) + geom_boxplot(outlier.shape=NA) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + scale_fill_manual(values=c('grey85','grey50'), name='Genotype:') + scale_y_continuous(labels=scales::percent, lim=c(0,1)) + theme_pubr() + labs(x='Celltype', y='Percent of "Undamaged" cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + stat_compare_means(method='t.test', label='p.format') ggsave(paste0(imgpref, 'stage2_prop_genotype.png') ,gplot, width=5.5, height=5.5, units='in', dpi=450) ggsave(paste0(imgpref, 'stage2_prop_genotype.pdf') ,gplot, width=5.5, height=5.5) # -------------------------------------------------------------------------- # Read in the transgene statistics and plot cell proportions by its recovery # -------------------------------------------------------------------------- tdf = read.delim(paste0(projdir, 'all_transgene_stats.tsv'), header=F) names(tdf) = c('run','well','gene','count') twide = spread(tdf, gene, count, fill=0) twide$cell = paste0(twide$run, '_', twide$well) tdf = merge(meta[,c('cell','genotype','n_counts','has_p25')], twide, all.x=TRUE) tdf$run = NULL tdf$well = NULL tdf[is.na(tdf)] = 0 gplot = ggplot(tdf, aes(p25 / n_counts, fill=genotype)) + scale_fill_manual(values=c('indianred','grey80'), name='Genotype') + scale_y_continuous(expand=c(0,0)) + scale_x_continuous(expand=c(0,0)) + labs(x = '# p25 reads / # reads') + theme_pubr() g1 = gplot + geom_histogram(position='fill', bins=40) + labs(y='Fraction of cells') g2 = gplot + geom_histogram(position='stack', bins=40) + labs(y='Number of cells') garr = ggarrange(g1, g2, ncol=1, nrow=2, align='hv', common.legend = TRUE, legend = "bottom") ggsave(paste0(imgpref, 'p25_rec_raw.png'), garr, width=4, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'p25_rec_raw.pdf'), garr, width=4, height=5) gplot = ggplot(tdf, aes(p25 / n_counts, fill=genotype)) + scale_fill_manual(values=c('indianred','grey80'), name='Genotype') + scale_y_continuous(expand=c(0,0)) + scale_x_log10(expand=c(0,0)) + labs(x = '# p25 reads / # reads') + theme_pubr() g1 = gplot + geom_histogram(position='fill', bins=40) + labs(y='Fraction of cells') g2 = gplot + geom_histogram(position='stack', bins=40) + labs(y='Number of cells') garr = ggarrange(g1, g2, ncol=1, nrow=2, align='hv', common.legend = TRUE, legend = "bottom") ggsave(paste0(imgpref, 'p25_rec_log10.png'), garr, width=4, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'p25_rec_log10.pdf'), garr, width=4, height=5) # -------------------------- # From the un-filtered data: # -------------------------- ometa = read.delim('Annotation/CKP25_mouse_metadata_010621.tsv', header=T, stringsAsFactors=F) ometa$cell = sub("_1_sequence.fastq","",ometa$fastq1) odf = merge(ometa, twide[,c('cell','p25')], all.x=TRUE) odf[is.na(odf)] = 0 gplot = ggplot(odf, aes(p25, fill=genotype)) + scale_fill_manual(values=c('indianred','grey80'), name='Genotype') + scale_y_continuous(expand=c(0,0)) + scale_x_continuous(expand=c(0,0)) + labs(x = '# p25 reads / # reads') + geom_histogram(position='stack', bins=50)+ theme_pubr() g1 = gplot + geom_histogram(position='fill', bins=40) + labs(y='Fraction of cells') # -------------------------------- # Plot the counts along the plate: # -------------------------------- # Estimate plate locations: tdf$loc = sapply(tdf$cell, function(x){nx=nchar(x); as.numeric(substr(x, nx-2, nx))}) tdf$plate = 1 * (tdf$loc > 96) + 1 tdf$run = paste0(sub("_.*","", tdf$cell), '_',tdf$plate) tdf$loc = tdf$loc %% 96 tdf$loc[tdf$loc == 0] = 96 nr = 12 nc = 8 tdf$col = tdf$loc %% nr tdf$col[tdf$col == 0] = nr tdf$row = (tdf$loc - tdf$col) / nr unique(tdf[,c('genotype','run')]) gplot = ggplot(tdf, aes(col, row, fill=p25)) + facet_wrap(~run) + scale_y_continuous(expand=c(0,0)) + scale_x_continuous(expand=c(0,0)) + scale_fill_distiller(palette='Blues', direction=1) + geom_tile() + labs(x='column',y='row') + theme_pubr() ggsave(paste0(imgpref, 'p25_on_plates.png'), gplot, width=8, height=8, units='in', dpi=450) ggsave(paste0(imgpref, 'p25_on_plates.pdf'), gplot, width=8, height=8)