#!/usr/bin/R # ------------------------------------- # Join processed fusions with metadata: # --follows P301S, updated for human # Updated: 01/11/23 # ------------------------------------- library(cbrbase) library(ggplot2) library(ggpubr) set_proj('DEVTRAJ', 'mosaicism') source(paste0(bindir, 'mosaicism/fusions_AD430/load_metadata.R')) source(paste0(bindir, 'multiRegion/auxiliary_plotting_settings.R')) # Directories: plotdir = paste0(imgdir, 'fusions/') imgpref = paste0(plotdir, 'AD430_fusions_') cmd = paste('mkdir -p', plotdir) system(cmd) # Load the preliminary fusion files: # ---------------------------------- fsdir = paste0(dbdir, 'fusions_AD430/') prefix = 'AD430.aggregated.fusion_predictions' predfile = paste0(fsdir, prefix, '.tsv') bcfile = paste0(fsdir, prefix, '.byread.withBC.tsv') procfile = paste0(fsdir, prefix, '.processed.tsv') withctfile = paste0(fsdir, prefix, '.processed.withct.tsv') countfile = paste0(fsdir, prefix, '.processed_counts.tsv') # Read files: fsdf = read.delim(predfile, header=T) bcdf = read.delim(bcfile, header=T) merged.fsdf = read.delim(procfile, header=T) # 31,402 events ## THESE ARE ANNOT FROM FUSIONS10x # Merge with sample-level metadata: merged.fsdf = merge(merged.fsdf, unique(fus.meta[,c('sample','dataset')]), all.x=TRUE) merged.fsdf$barcode = with(merged.fsdf, paste0(dataset, ':', CB)) # Remove non-mapped datasets: merged.fsdf = merged.fsdf[!is.na(merged.fsdf$dataset),] # 29,358 events # Gauge recovery, should be about 50% (59%) merged.fsdf$rec.barcode = merged.fsdf$barcode %in% cellmeta$barcode recdf = aggregate(rec.barcode ~ dataset, merged.fsdf, mean) recdf = recdf[order(recdf$rec.barcode),, drop=F] # Create count var: ctdf = agg.rename(annots ~ barcode + dataset, merged.fsdf, length, 'count') cellmeta = merge(cellmeta, unique(fus.meta[,c('dataset','projid')])) ctdf = merge(cellmeta, ctdf, all.x=TRUE) ctdf$count[is.na(ctdf$count)] = 0 # Merge the celltype into merged.fsdf and write out for additional stats: merged.fsdf = merge(merged.fsdf, cellmeta[,c('barcode', 'celltype', 'cthr')]) write.table(merged.fsdf, withctfile, quote=F, row.names=F, sep="\t") # Update the metadatafor merging with the fusions: # -- same as gene fusions # ------------------------------------------------- metadata$bd = 'CTRL' metadata$bd[metadata$braaksc %in% c('5','6')] = 'AD' metadata$bd2 = NA metadata$bd2[metadata$braaksc %in% c('0','1','2')] = 'CTRL' metadata$bd2[metadata$braaksc %in% c('5','6')] = 'AD' metadata$nrad = 'CTRL' metadata$nrad[metadata$niareagansc %in% c('1','2')] = 'AD' metadata$cogdx.ad = 'Low/Mild' metadata$cogdx.ad[metadata$cogdx %in% c('4','5')] = 'AD' metadata$Apoe_e4 = ifelse(metadata$apoe_genotype %in% c('24','44','34'), 'Yes','No') # Merge with individual-level metadata: # ------------------------------------- advars = c('niareagansc','nrad','braaksc','bd', 'bd2','cogdx.ad') covars = c('age_death','msex','pmi', 'Apoe_e4') mcols = c('projid', advars, covars) ctdf = merge(ctdf, unique(metadata[,mcols])) ctdf$count_per_gene = with(ctdf, n_counts / n_genes) write.table(ctdf, countfile, quote=F, sep='\t', row.names=F)