#!/usr/bin/R # ------------------------------------------------ # Plot the markers and read counts for revision QC # Updated: 03/28/23 # ------------------------------------------------ library(cbrbase) set_proj('DEVTRAJ') # Libraries: library(tidyr) library(MASS) library(ggplot2) library(ggpubr) library(ggrepel) library(ggbeeswarm) library(scales) library(RColorBrewer) library(Matrix) options(width=170) # Load in the two cohorts of candidate gene fusion counts: # -------------------------------------------------------- gbdir = paste0(bindir, 'mosaicism/genefusions/') # source(paste0(gbdir, 'load_fusion_data.R')) # source(paste0(gbdir, 'auxiliary_fusion_plotting.R')) # source(paste0(gbdir, 'load_go_signatures.R')) source(paste0(gbdir, 'load_joint_expression.R')) source(paste0(bindir, 'multiRegion/auxiliary_plotting_settings.R')) # Make directories for analysis: # ------------------------------ analysis = 'fusion' project = 'ALZ' imgdir = paste0(img, project, '/', analysis , '/') imgpref = paste0(imgdir, 'qcstats_', project, '_') cmd = paste0('mkdir -p ', imgdir) system(cmd) # Subset to used cohort: meta = meta[meta$cohort == 'ALZ',] meta = meta[meta$relabel != 'Other/Cancer',] kept.cells = meta$sample # Plot the basic QC statistics: # ----------------------------- # Aggregate the relevant statistics: meta$log10_n_counts = log10(meta$n_counts) statvars = c('n_genes', 'log10_n_counts', 'percent_mito', 'percent_ribo', 'count_per_gene') df = gather(meta[,c('relabel', 'sample', statvars)], var, value, -relabel, -sample) relabel.col['Undefined/Dying'] = 'grey75' # Add comparisons: df$relabel = factor(df$relabel, levels=rev(levels(meta$relabel))) gp = ggplot(df, aes(relabel, value, fill=relabel)) + facet_wrap(~var, scales='free_x') + scale_fill_manual(values=relabel.col) + geom_boxplot(outlier.size=.5) + coord_flip() + theme_pubr() pltprefix = paste0(imgpref, 'stats_barplot') saveGGplot(gp, pltprefix, w=10, h=5) # Load in the marker genes, top 2 per class: # ------------------------------------------ broad.df = read.delim(paste0(dbdir, 'Annotation/broad_ct_markers.tsv'), header=T) markerdf = read.delim(paste0(dbdir, 'Annotation/markers_brain_major_celltypes.tsv'), header=T) clusters = c('CAMs', 'Per', 'SMC', 'T cells', 'Fib') marklist = lapply(clusters, function(x, ntop=2){ subdf = markerdf[markerdf$cluster == x,] head(subdf[,c('cluster','gene')], ntop) }) markerdf = do.call(rbind, marklist) names(markerdf) = c('cell', 'gene') markerdf = rbind(broad.df, markerdf) # Plot the marker gene expressions: # --------------------------------- tform = make.tform(meta$relabel, u=as.character(unique(meta$relabel))) mat = as.matrix(fullmat[, meta$sample] %*% tform) # library size normalization: norm = log(sweep(mat, 2, colSums(mat) / 1e6, '/') + .1) marker.ensg = anno$ENSG[anno$symbol %in% markerdf$gene] mark.norm = norm[marker.ensg,] rownames(anno) = anno$ENSG rownames(mark.norm) = anno[rownames(mark.norm), 'symbol'] mark.norm = mark.norm[markerdf$gene,] pltmat = t(mark.norm) load.colors() ux = 2 ht = Heatmap(pltmat, viridis(100), width = ncol(pltmat)*unit(ux, "mm"), height = nrow(pltmat)*unit(1.5, "mm"), # Splits are too much column_split=markerdf$cell, border_gp=gpar(color='black', lwd=.5), cluster_rows=FALSE, use_raster=FALSE) h = 2 + 1 / 15 * nrow(pltmat) w = 2 + 1 / 15 * ncol(pltmat) pltpref = paste0(imgpref, 'markergenes_heatmap') saveHeatmap(ht, pltpref, h=h, w=w)