#!/usr/bin/R # ------------------------------------------ # Plot basic stats for the CKP25 mouse data: # ------------------------------------------ library(cbrbase) set_proj('DEVTRAJ', 'mosaicism') library(ggplot2) library(ggpubr) library(ggbeeswarm) library(tidyr) library(scales) library(RColorBrewer) source(paste0(sbindir, 'CKP25/load_metadata.R')) # library(cbrbase) # set_proj('DEVTRAJ') # source(paste0(bindir, 'multiRegion/load_metadata.R')) # Building functions for regression: asform = function(x){ as.formula(paste0(x, collapse='')) } # Directories: imgdir = paste0(img, 'CKP25/') imgdir2 = paste0(imgdir, 'human_scpfc/') imgpref = paste0(imgdir2, 'human_scpfc_') cmd = paste('mkdir -p',imgdir, imgdir2) system(cmd) # ------------------------ # Load in full rna matrix: # ------------------------ load('10x_processed/filtered_matrix_attr.Rda') rna.mat = as(rna.mat, 'dgCMatrix') # ---------------- # Add in metadata: # ---------------- meta = read.delim('Annotation/metadata_PFC_all_individuals_092520.tsv') dim(rna.df) rna.df = merge(rna.df, meta[,c('projid','age_death','pmi','msex', 'nft','plaq_d','plaq_n','niareagansc','cogdx','braaksc')]) rna.df$barcode = paste0('RNA_',rna.df$TAG) rna.df$cogdxad = 'NCI' rna.df$cogdxad[rna.df$cogdx %in% c(4,5)] = 'AD' rna.df$nrad = 'CTRL' rna.df$nrad[rna.df$niareagansc %in% c(1,2)] = 'AD' rna.df$nrad = factor(rna.df$nrad, levels=c('CTRL','AD')) pqdf = unique(rna.df[, c('projid','age_death','pmi','msex', 'nft','plaq_d','plaq_n','nrad','cogdxad')]) rownames(rna.df) = rna.df$barcode # ----------------------------------- # Look at select Stage1-Stage2 cells: # ----------------------------------- gns = c('LIG1','HJURP','EXOC4','CEP192','ZGRF1','CDKN1A', 'APOE','UBB','UBC') # TODO: Norm matrix?? gmat = as.matrix(rna.mat[gns,]) marg = colSums(rna.mat) norm = sweep(gmat,2,marg / median(marg), '/') norm = log(norm + 1) crmat = cor(t(norm)) mx =0.25 crmat[crmat > mx] = mx crmat[crmat < -mx] = -mx Heatmap(crmat, zlim=c(-mx, mx)) gdf = data.frame(t(norm)) gdf$barcode = rownames(gdf) gdf = gather(gdf, gene, value, -barcode) gdf = merge(gdf, rna.df[,c('barcode','broad.cell.type','cogdxad','nrad','projid','braaksc','cogdx')]) subdf = gdf[gdf$broad.cell.type == 'Ex',] aggdf = aggregate(value ~ gene + projid + nrad + cogdxad, subdf, mean) ggplot(aggdf, aes(nrad, value, fill=nrad)) + facet_wrap(~gene, scales='free_y') + geom_boxplot() + stat_compare_means(label='p.signif') + theme_pubr() ggplot(subdf, aes(nrad, value, fill=nrad)) + facet_wrap(~gene, scales='free_y') + # geom_boxplot() + geom_violin() + stat_compare_means(label='p.signif') + theme_pubr() # label.col=c('#B22222', '#146C14', '#144F84','#5D195D','#A0522D','#F26D21','#144F84', 'darkgrey') tcols = brewer.pal(12,'Paired') j = 15 label.col = snap.cols[j:(j+7)] label.col = tcols[1:8] names(label.col) = c('Stage 2','Excitatory','Inhibitory','Microglia', 'OPC','Oligodendrocyte','Endothelial','Batch') # ------------------- # Plots of the UMAPs: # ------------------- par(mar=c(1,1,1,1)) plot(meta$Xumap1, meta$Xumap2, pch=19, col=label.col[meta$label], axes=F, ylab='', xlab='') mtext('UMAP-1',side=1, line=-.5) mtext('UMAP-2',side=2, line=-.5) j = 25 sublabel.col = c(snap.cols[j:(j+10)],'grey') # sublabel.col = c(snap.cols[j:(j+10)],'grey') names(sublabel.col) = c('Stage 2','Ex0','Ex1','Ex2','Ex3','In0', 'In1', 'Microglia', 'OPC','Oligodendrocyte','Endothelial','Batch') par(mar=c(1,1,1,1)) plot(meta$Xumap1, meta$Xumap2, pch=19, col=sublabel.col[meta$sublabel], axes=F, ylab='', xlab='') mtext('UMAP-1',side=1, line=-.5) mtext('UMAP-2',side=2, line=-.5) # Plot proportions: gplot = ggplot(meta, aes(mouse_id, fill=label)) + geom_bar(position='fill') + scale_fill_manual(values=label.col, name='Celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_labels.png') ,gplot, width=6, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_labels.pdf') ,gplot, width=6, height=5) # Plot proportions: gplot = ggplot(meta, aes(mouse_id, fill=sublabel)) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0)) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_sublabels.png') ,gplot, width=6, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_sublabels.pdf') ,gplot, width=6, height=5) submeta = meta[meta$label %in% c('Excitatory','Inhibitory','Stage 2'),] # Plot proportions: gplot = ggplot(submeta, aes(mouse_id, fill=sublabel)) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0)) + labs(x='Mouse ID', y='Percent of cells')+ theme_pubr() + theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'subnrn_proportions_sublabels.png') ,gplot, width=6, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'subnrn_proportions_sublabels.pdf') ,gplot, width=6, height=5) # Plot proportions: gplot = ggplot(meta, aes(celltype, fill=sublabel)) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Sub-celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + theme(legend.position='right') ggsave(paste0(imgpref, 'proportions_sublabels_celltype.png') ,gplot, width=5, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_sublabels_celltype.pdf') ,gplot, width=5, height=5) # Plot proportions: gplot = ggplot(meta, aes(mouse_id, fill=celltype)) + geom_bar(position='fill') + scale_fill_manual(values=celltype.col, name='Celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_celltype.png') ,gplot, width=6, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_celltype.pdf') ,gplot, width=6, height=5) # Plot proportions: gplot = ggplot(submeta, aes(timepoint, fill=sublabel)) + facet_wrap(~genotype) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_geno_time_sublabel.png') ,gplot, width=5, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_geno_time_sublabel.pdf') ,gplot, width=5, height=5) # Plot proportions by has_p25 gplot = ggplot(meta, aes(has_p25, fill=sublabel)) + facet_wrap(~genotype) + geom_bar(position='fill') + scale_fill_manual(values=sublabel.col, name='Celltype:') + scale_y_continuous(expand=c(0,0), labels=scales::percent) + theme_pubr() + labs(x='Mouse ID', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) ggsave(paste0(imgpref, 'proportions_geno_hasp25_sublabel.png') ,gplot, width=5, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'proportions_geno_hasp25_sublabel.pdf') ,gplot, width=5, height=5) # -------------------------------------------------------- # Test: are the sub-neuronal fraction changes significant: # -------------------------------------------------------- submeta = meta[meta$label %in% c('Excitatory','Inhibitory','Stage 2'),] totct = agg.rename(cell ~ mouse_id + genotype, submeta, length, 'tot.cell') possct = expand.grid(mouse_id=unique(meta$mouse_id), sublabel=unique(submeta$sublabel)) subct = aggregate(cell ~ sublabel + mouse_id, submeta, length) subct = merge(possct, subct, all.x=TRUE) subct = merge(subct, totct) subct$cell[is.na(subct$cell)] = 0 subct$frac = subct$cell / subct$tot.cell subct = merge(subct, unique(meta[,c('sublabel','short.sublabel')])) gplot = ggplot(subct, aes(short.sublabel, frac, fill=genotype)) + geom_boxplot(outlier.shape=NA) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + scale_fill_manual(values=c('grey85','grey50'), name='Genotype:') + scale_y_continuous(labels=scales::percent, lim=c(0,.85)) + theme_pubr() + labs(x='Celltype', y='Percent of neuronalcells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + stat_compare_means(method='t.test', label='p.format') ggsave(paste0(imgpref, 'subneuronal_prop_genotype.png') ,gplot, width=7, height=5.5, units='in', dpi=450) ggsave(paste0(imgpref, 'subneuronal_prop_genotype.pdf') ,gplot, width=7, height=5.5) # -------------------------------------------------------------------- # Test: are the sub-neuronal fraction changes significant across time: # -------------------------------------------------------------------- submeta = meta[meta$label %in% c('Excitatory','Inhibitory','Stage 2'),] totct = agg.rename(cell ~ mouse_id + genotype + timepoint, submeta, length, 'tot.cell') possct = expand.grid(mouse_id=unique(meta$mouse_id), sublabel=unique(submeta$sublabel), timepoint=unique(submeta$timepoint)) subct = aggregate(cell ~ sublabel + mouse_id + timepoint, submeta, length) subct = merge(possct, subct, all.x=TRUE) subct = merge(subct, totct) subct$cell[is.na(subct$cell)] = 0 subct$frac = subct$cell / subct$tot.cell subct = merge(subct, unique(meta[,c('sublabel','short.sublabel')])) gplot = ggplot(subct, aes(short.sublabel, frac, fill=timepoint)) + facet_wrap(~genotype) + geom_boxplot(outlier.shape=NA) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + scale_fill_manual(values=c('grey85','grey50'), name='Time-point:') + scale_y_continuous(labels=scales::percent, lim=c(0,.85)) + theme_pubr() + labs(x='Celltype', y='Percent of neuronal cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + stat_compare_means(method='t.test', label='p.format') ggsave(paste0(imgpref, 'subneuronal_prop_genotype_timepoint.png') ,gplot, width=10, height=5.5, units='in', dpi=450) ggsave(paste0(imgpref, 'subneuronal_prop_genotype_timepoint.pdf') ,gplot, width=10, height=5.5) # ----------------------------------------------------- # Test: is the OPC difference in fractions significant: # ----------------------------------------------------- meta$is.glial = meta$label %in% c('Oligodendrocyte','Microglia','OPC') submeta = meta[meta$is.glial,] totct = agg.rename(cell ~ mouse_id + genotype, submeta, length, 'tot.cell') possct = expand.grid(mouse_id=unique(meta$mouse_id), label=unique(submeta$label)) subct = aggregate(cell ~ label + mouse_id, submeta, length) subct = merge(possct, subct, all.x=TRUE) subct = merge(subct, totct) subct$cell[is.na(subct$cell)] = 0 subct$frac = subct$cell / subct$tot.cell subct = merge(subct, unique(meta[,c('label','short.label')])) gplot = ggplot(subct, aes(short.label, frac, fill=genotype)) + geom_boxplot(outlier.shape=NA) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + scale_fill_manual(values=c('grey85','grey50'), name='Genotype:') + scale_y_continuous(labels=scales::percent, lim=c(0,.85)) + theme_pubr() + labs(x='Celltype', y='Percent of glial cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + stat_compare_means(method='t.test', label='p.format') ggsave(paste0(imgpref, 'glial_prop_genotype.png') ,gplot, width=3.2, height=5.5, units='in', dpi=450) ggsave(paste0(imgpref, 'glial_prop_genotype.pdf') ,gplot, width=3.2, height=5.5) # ----------------------------------------------------- # Test: what is % of Stage 2 from "Undamaged" # ----------------------------------------------------- submeta = meta[meta$celltype == 'Undamaged',] totct = agg.rename(cell ~ mouse_id + genotype, submeta, length, 'tot.cell') possct = expand.grid(mouse_id=unique(meta$mouse_id), label=unique(submeta$label)) subct = aggregate(cell ~ label + mouse_id, submeta, length) subct = merge(possct, subct, all.x=TRUE) subct = merge(subct, totct) subct$cell[is.na(subct$cell)] = 0 subct$frac = subct$cell / subct$tot.cell subct = merge(subct, unique(meta[,c('label','short.label')])) gplot = ggplot(subct, aes(short.label, frac, fill=genotype)) + geom_boxplot(outlier.shape=NA) + geom_jitter(position=position_jitterdodge(jitter.width=.15, dodge.width=.75), cex=.8) + scale_fill_manual(values=c('grey85','grey50'), name='Genotype:') + scale_y_continuous(labels=scales::percent, lim=c(0,1)) + theme_pubr() + labs(x='Celltype', y='Percent of "Undamaged" cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) + stat_compare_means(method='t.test', label='p.format') ggsave(paste0(imgpref, 'stage2_prop_genotype.png') ,gplot, width=5.5, height=5.5, units='in', dpi=450) ggsave(paste0(imgpref, 'stage2_prop_genotype.pdf') ,gplot, width=5.5, height=5.5) # -------------------------------------------------------------------------- # Read in the transgene statistics and plot cell proportions by its recovery # -------------------------------------------------------------------------- tdf = read.delim(paste0(projdir, 'all_transgene_stats.tsv'), header=F) names(tdf) = c('run','well','gene','count') twide = spread(tdf, gene, count, fill=0) twide$cell = paste0(twide$run, '_', twide$well) tdf = merge(meta[,c('cell','genotype','n_counts','has_p25')], twide, all.x=TRUE) tdf$run = NULL tdf$well = NULL tdf[is.na(tdf)] = 0 gplot = ggplot(tdf, aes(p25 / n_counts, fill=genotype)) + scale_fill_manual(values=c('indianred','grey80'), name='Genotype') + scale_y_continuous(expand=c(0,0)) + scale_x_continuous(expand=c(0,0)) + labs(x = '# p25 reads / # reads') + theme_pubr() g1 = gplot + geom_histogram(position='fill', bins=40) + labs(y='Fraction of cells') g2 = gplot + geom_histogram(position='stack', bins=40) + labs(y='Number of cells') garr = ggarrange(g1, g2, ncol=1, nrow=2, align='hv', common.legend = TRUE, legend = "bottom") ggsave(paste0(imgpref, 'p25_rec_raw.png'), garr, width=4, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'p25_rec_raw.pdf'), garr, width=4, height=5) gplot = ggplot(tdf, aes(p25 / n_counts, fill=genotype)) + scale_fill_manual(values=c('indianred','grey80'), name='Genotype') + scale_y_continuous(expand=c(0,0)) + scale_x_log10(expand=c(0,0)) + labs(x = '# p25 reads / # reads') + theme_pubr() g1 = gplot + geom_histogram(position='fill', bins=40) + labs(y='Fraction of cells') g2 = gplot + geom_histogram(position='stack', bins=40) + labs(y='Number of cells') garr = ggarrange(g1, g2, ncol=1, nrow=2, align='hv', common.legend = TRUE, legend = "bottom") ggsave(paste0(imgpref, 'p25_rec_log10.png'), garr, width=4, height=5, units='in', dpi=450) ggsave(paste0(imgpref, 'p25_rec_log10.pdf'), garr, width=4, height=5) # -------------------------- # From the un-filtered data: # -------------------------- ometa = read.delim('Annotation/CKP25_mouse_metadata_010621.tsv', header=T, stringsAsFactors=F) ometa$cell = sub("_1_sequence.fastq","",ometa$fastq1) odf = merge(ometa, twide[,c('cell','p25')], all.x=TRUE) odf[is.na(odf)] = 0 gplot = ggplot(odf, aes(p25, fill=genotype)) + scale_fill_manual(values=c('indianred','grey80'), name='Genotype') + scale_y_continuous(expand=c(0,0)) + scale_x_continuous(expand=c(0,0)) + labs(x = '# p25 reads / # reads') + geom_histogram(position='stack', bins=50)+ theme_pubr() g1 = gplot + geom_histogram(position='fill', bins=40) + labs(y='Fraction of cells') # Try by cell location: # -------------------------------- # Plot the counts along the plate: # -------------------------------- # Estimate plate locations: tdf$loc = sapply(tdf$cell, function(x){nx=nchar(x); as.numeric(substr(x, nx-2, nx))}) tdf$plate = 1 * (tdf$loc > 96) + 1 tdf$run = paste0(sub("_.*","", tdf$cell), '_',tdf$plate) tdf$loc = tdf$loc %% 96 tdf$loc[tdf$loc == 0] = 96 nr = 12 nc = 8 tdf$col = tdf$loc %% nr tdf$col[tdf$col == 0] = nr tdf$row = (tdf$loc - tdf$col) / nr unique(tdf[,c('genotype','run')]) gplot = ggplot(tdf, aes(col, row, fill=p25)) + facet_wrap(~run) + scale_y_continuous(expand=c(0,0)) + scale_x_continuous(expand=c(0,0)) + scale_fill_distiller(palette='Blues', direction=1) + geom_tile() + labs(x='column',y='row') + theme_pubr() ggsave(paste0(imgpref, 'p25_on_plates.png'), gplot, width=8, height=8, units='in', dpi=450) ggsave(paste0(imgpref, 'p25_on_plates.pdf'), gplot, width=8, height=8) # -------------------------------------- # Statistics on gene fusion percentages: # -------------------------------------- submeta = meta[meta$label %in% c('Excitatory','Inhibitory','Stage 2'),] fusiondf = read.delim(paste0(projdir, 'allprojid_aggregated.starfusion.pred.tsv'), header=T) names(fusiondf)[1] = 'cell' # Analysis at intra-chromosomal level: intradf = fusiondf[fusiondf$'annots.1' == 'INTRACHROMOSOMAL',] intradf$lpos = as.numeric(sub(":[-+]","",sub("chr[0-9XYM]*:","",intradf$LeftBreakpoint))) intradf$rpos = as.numeric(sub(":[-+]","",sub("chr[0-9XYM]*:","",intradf$RightBreakpoint))) intradf$dist = abs(intradf$lpos - intradf$rpos) mean(intradf$dist < 1e6) # 79% of intra are within 1Mb of each other. intradf$close.fusion = intradf$dist < 1e6 idf = aggregate(close.fusion ~ cell, intradf, sum) idf = merge(meta, idf, all.x=TRUE) idf$close.fusion[is.na(idf$close.fusion)] = 0 idf$has.fusion = idf$close.fusion > 0 aidf = aggregate(cell ~ has.fusion + short.sublabel + genotype, idf, length) possct = expand.grid(mouse_id=unique(meta$mouse_id), sublabel=unique(meta$sublabel)) gplot = ggplot(aidf, aes(short.sublabel, cell, fill=has.fusion)) + facet_wrap(~genotype) + scale_fill_manual(values=c('grey85','slateblue4'), name='Has Intra-chromosomal Fusion < 1Mb:') + scale_y_continuous(expand=c(0,0)) + theme_pubr() + labs(x='Sub-celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) g1 = gplot + geom_bar(stat='identity', position='stack') + labs(y='Number of cells', x='') g2 = gplot + geom_bar(stat='identity', position='fill') + labs(y='Fraction of cells') garr = ggarrange(g1, g2, ncol=1, nrow=2, align='hv', common.legend = TRUE, legend = "top") ggsave(paste0(imgpref, 'intrafusion_subcelltype.png'), garr, width=5, height=6, units='in', dpi=450) ggsave(paste0(imgpref, 'intrafusion_subcelltype.pdf'), garr, width=5, height=6) # Test for enrichment of Stage 2: mat = matrix(c(nrow(idf), sum(idf$has.fusion), sum(idf$label == 'Stage 2'), sum(idf$label == 'Stage 2' & idf$has.fusion)), 2,2) fisher.test(mat) # Just in ckp25 mice idf2 = idf[idf$genotype == 'CKp25' & idf$sublabel %in% c('Ex0','Ex1','Ex2','Ex3','In0','In1','Stage 2'),] mat = matrix(c(nrow(idf2), sum(idf2$has.fusion), sum(idf2$label == 'Stage 2'), sum(idf2$label == 'Stage 2' & idf2$has.fusion)), 2,2) fisher.test(mat) # Test for differences between time-points: mat = matrix( c(sum(idf$label == 'Stage 2'), sum(idf$label == 'Stage 2' & idf$has.fusion), sum(idf$label == 'Stage 2' & idf$timepoint == '2wk'), sum(idf$label == 'Stage 2' & idf$has.fusion & idf$timepoint == '2wk')) , 2,2) fisher.test(mat) # ------------------------------------ # Analysis at inter-chromosomal level: # ------------------------------------ interdf = fusiondf[fusiondf$'annots.1' == 'INTERCHROMOSOMAL',] interdf$inter.fusion = 1 # Prune fusions: interdf = interdf[grep("Snhg11",interdf$X.FusionName,invert=TRUE),] idf = aggregate(inter.fusion ~ cell, interdf, sum) idf = merge(meta, idf, all.x=TRUE) idf$inter.fusion[is.na(idf$inter.fusion)] = 0 idf$has.fusion = idf$inter.fusion > 0 aidf = aggregate(cell ~ has.fusion + short.sublabel + genotype, idf, length) possct = expand.grid(mouse_id=unique(meta$mouse_id), sublabel=unique(submeta$sublabel)) gplot = ggplot(aidf, aes(short.sublabel, cell, fill=has.fusion)) + facet_wrap(~genotype) + scale_fill_manual(values=c('grey85','slateblue4'), name='Has Inter-chromosomal Fusion:') + scale_y_continuous(expand=c(0,0)) + theme_pubr() + labs(x='Sub-celltype', y='Percent of cells')+ theme(axis.text.x=element_text(angle=90,vjust=.5, hjust=1)) g1 = gplot + geom_bar(stat='identity', position='stack') + labs(y='Number of cells', x='') g2 = gplot + geom_bar(stat='identity', position='fill') + labs(y='Fraction of cells') garr = ggarrange(g1, g2, ncol=1, nrow=2, align='hv', common.legend = TRUE, legend = "top") ggsave(paste0(imgpref, 'interfusion_subcelltype.png'), garr, width=5, height=6, units='in', dpi=450) ggsave(paste0(imgpref, 'interfusion_subcelltype.pdf'), garr, width=5, height=6) # Test for enrichment of Stage 2: mat = matrix(c(nrow(idf), sum(idf$has.fusion), sum(idf$label == 'Stage 2'), sum(idf$label == 'Stage 2' & idf$has.fusion)), 2,2) fisher.test(mat) mat[2,1] = mat[2,1] + 65 mat[2,2] = mat[2,2] + 36 fisher.test(mat) # Just in ckp25 mice idf2 = idf[idf$genotype == 'CKp25' & idf$sublabel %in% c('Ex0','Ex1','Ex2','Ex3','In0','In1','Stage 2'),] mat = matrix(c(nrow(idf2), sum(idf2$has.fusion), sum(idf2$label == 'Stage 2'), sum(idf2$label == 'Stage 2' & idf2$has.fusion)), 2,2) fisher.test(mat) mat[2,1] = mat[2,1] + 54 mat[2,2] = mat[2,2] + 36 fisher.test(mat)