#!/usr/bin/R # ------------------------------------------------------------- # Check that the HC barcodes map properly with the fusion data: # - in case that the sample numbers are misaligned # Updated: 01/12/23 # ------------------------------------------------------------- library(cbrbase) library(ggplot2) library(ggpubr) options(width=170) set_proj('DEVTRAJ', 'mosaicism') source(paste0(bindir, 'mosaicism/fusions_human10x/load_metadata.R')) source(paste0(bindir, 'multiRegion/auxiliary_plotting_settings.R')) # Directories: plotdir = paste0(imgdir, 'metadata/') imgpref = paste0(plotdir, 'human10x_') cmd = paste('mkdir -p', plotdir) system(cmd) # Load the preliminary fusion file: # --------------------------------- fsdir = paste0(dbdir, 'variants/lineage/') prefix = 'human10x.aggregated.fusion_predictions' procfile = paste0(fsdir, prefix, '.processed.tsv') merged.fsdf = read.delim(procfile, header=T) # 31,402 events # Subset fusion data to hippocampus data: merged.fsdf = merge(merged.fsdf, unique(fus.meta[,c('sample','region', 'dataset')])) merged.fsdf = merged.fsdf[merged.fsdf$region == 'HC',] # Subset cellmeta to HC data: cellmeta$batch = sub("-.*", "", cellmeta$dataset) hc.cellmeta = cellmeta[cellmeta$batch == 'HC',] hc.cellmeta$CB = sub(".*:", "", hc.cellmeta$barcode) # Look at all combinations of CB to CB across datasets: # ----------------------------------------------------- # Match up: t1 = unique(merged.fsdf$dataset) t2 = unique(hc.cellmeta$dataset) jmat = matrix(0, nrow=length(t1), ncol=length(t2), dimnames=list(t1, t2)) for (st1 in t1){ seqs1 = merged.fsdf$CB[merged.fsdf$dataset == st1] for (st2 in t2){ seqs2 = hc.cellmeta$CB[hc.cellmeta$dataset == st2] lint = length(intersect(seqs1, seqs2)) # lunion = length(seqs1) + length(seqs2) - lint # jacc = lint / lunion jacc = lint / length(seqs1) jmat[st1, st2] = jacc # cat(st1, '\t', st2, '\t', jacc, '\n') } } ind = apply(jmat, 1, which.max) mdf = data.frame(fs=names(ind), cell=colnames(jmat)[ind]) sum(mdf$fs != mdf$cell) # Match exactly # Plot heatmap of overlaps: ht = Heatmap(jmat, col=col1, width=ncol(jmat) * unit(2, 'mm'), height=nrow(jmat) * unit(2, 'mm'), cluster_rows=FALSE, cluster_columns=FALSE ) w = ncol(jmat) / 15 + 2 h = nrow(jmat) / 15 + 2 saveHeatmap(ht, paste0(imgpref, 'hippocampus_fusions_barcode_agreement'), w=w, h=h)