#!/usr/bin/R # ----------------------------------------------------------- # Load the predicted fusion data for the ALZ/ALZ_repl cohorts # Updated: 08/07/22 # ------------------------------------------------ library(cbrbase) set_proj('DEVTRAJ') # Libraries: library(tidyr) options(width=170) # Load in and process data (saves to matrices): # project = 'ALZ_repl' project = 'ALZ' commandArgs <- function(){ c(project)} source(paste0(bindir, 'mosaicism/load_metadata.R')) source(paste0(bindir, 'mosaicism/genefusions/auxiliary_fusion_plotting.R')) # Load in the two cohorts of candidate gene fusion counts: # -------------------------------------------------------- outpref = paste0(dbdir,'variants/ALZ_joint_fusion_datasets_') filesuff = 'maxsensitivity_prediction_numbers_wmeta' fssuff = 'maxsensitivity_abridged_fusions_filtered' count.tsv = paste0(outpref, filesuff, '.tsv') fusion.tsv = paste0(outpref, fssuff, '.tsv') ctdf = read.delim(count.tsv, header=T) fsdf = read.delim(fusion.tsv, header=T) # Rename + order variables: # ------------------------- ctdf$bd[ctdf$bd == '1-4'] = 'CTRL' ctdf$bd[ctdf$bd == '5-6'] = 'AD' ctdf$bd = factor(ctdf$bd, levels=c('CTRL','AD')) ctdf$bd2 = factor(ctdf$bd2, levels=c('CTRL','AD')) ctdf$nrad = factor(ctdf$nrad, levels=c('CTRL','AD')) ctdf$cogdx.ad = factor(ctdf$cogdx.ad, levels=c('Low/Mild','AD')) # Alternative regression vars: ctdf$has_fusion = ctdf$count > 0 ctdf$capped_count = ctdf$count ctdf$capped_count[ctdf$capped_count > 10] = 10 ctdf$rate = ctdf$count / ctdf$n_counts ctdf$capped_rate = ctdf$capped_count / ctdf$n_counts reg.vars = c('has_fusion', 'count', 'capped_count', 'rate', 'capped_rate') ctdf$age_rescaled = ctdf$age_death / 10 ctdf$pmi_rescaled = ctdf$pmi / 10 # Subset: # countcutoff = 25000 countcutoff = 0 sub.ctdf = ctdf[ctdf$n_counts >= countcutoff,] sub.ctdf = sub.ctdf[sub.ctdf$relabel != 'Other/Cancer',] # Regression estimates for a number of selected covariates: # ------------------------------------------------ advars = c('niareagansc','nrad','braaksc','bd', 'bd2','cogdx.ad') covars = c('age_rescaled','msex','pmi_rescaled', 'Apoe_e4') test.vars= c('cogdx.ad','bd','nrad', covars) ext.covar = '+ pmi_rescaled + msex + Apoe_e4 + age_rescaled + percent_mito + percent_ribo' ext.covar = '+ pmi_rescaled + msex + age_rescaled + percent_mito + percent_ribo' # NOTE: n_counts better models than log(n_counts) # count.covar = '+ n_counts + n_genes' # Without platform interaction count.covar = '+ platform * n_counts + platform* n_genes + platform * count_per_gene' # Variable mappings: varmap = c( 'msex' = 'Sex (M)', 'nradAD' = 'NIA-Reagan score (1-2)', 'bd2AD' = 'Braak Stage (3-6)', 'bdAD' = 'Braak Stage (5-6)', 'Apoe_e4Yes' = 'Has APOE-e4', 'cogdx.adAD' = 'AD Cog. Imp.', 'pmi_rescaled' = 'PMI', 'age_rescaled' = 'Age of death')