#!/usr/bin/R # ---------------------------------------------------- # Compare the regression models, as per R2 suggestions # Updated: 02/21/23 # ---------------------------------------------------- library(cbrbase) set_proj('DEVTRAJ') # Libraries: library(tidyr) library(ggplot2) library(ggpubr) library(RColorBrewer) options(width=170) # Load in and process data (saves to matrices): project = 'ALZ' commandArgs <- function(){ c(project)} source(paste0(bindir, 'mosaicism/load_metadata.R')) # Make directories for analysis: analysis = 'fusion' imgdir = paste0(img, project, '/', analysis , '/') imgpref = paste0(imgdir, analysis, "_mspred_", project, '_') cmd = paste0('mkdir -p ', imgdir) system(cmd) # Load in the two cohorts of candidate gene fusion counts: # -------------------------------------------------------- source(paste0(bindir, 'mosaicism/genefusions/load_fusion_data.R')) source(paste0(bindir, 'mosaicism/genefusions/auxiliary_fusion_plotting.R')) coh.covar = '+ pmi_rescaled + msex + age_rescaled' for (only.exc in c(TRUE, FALSE)){ coh.ctdf = sub.ctdf[sub.ctdf$cohort == 'ALZ',] if (only.exc){ df = coh.ctdf[coh.ctdf$relabel == 'Excitatory',] } else { df = coh.ctdf} resdf = c() for (var in test.vars){ fit.pois = list() fit.nb = list() for (use.model in c('original', 'proposed', 'final')){ if (use.model == 'original'){ # Original model, with + n_counts + n_genes count.covar = '+ n_counts + n_genes + count_per_gene' form.vec = c(reg.var, ' ~ ', var, count.covar, coh.covar) } else if (use.model == 'proposed'){ # Reviewer proposed model, with + offset(log(n_counts)) + n_genes count.covar = '+ n_genes + count_per_gene' form.vec = c(reg.var, ' ~ offset(log(n_counts)) + ', var, count.covar, coh.covar) } else if (use.model == 'final'){ # Final model, with offset(log(n_counts)) as well as log(n_genes) count.covar = '+ log(n_genes) + count_per_gene' form.vec = c(reg.var, ' ~ offset(log(n_counts)) + ', var, count.covar, coh.covar) } if (!only.exc){ form.vec = c(form.vec, ' + relabel') } form = asform(c(form.vec)) if (reg.var %in% c('capped_count','count')){ # Comparison here of poisson vs. NB model fit.pois[[use.model]] = glm(form, df, family='poisson') fit.nb[[use.model]] = glm.nb(form, df) } else if (reg.var %in% c('has_fusion')){ fit = glm(form, df, family='binomial') } else { fit = glm(form, df, family='gaussian') } cfit = processLmFit(fit) cfit$var = var resdf = rbind(resdf, cfit[2,]) } lrtest(fit.pois$original, fit.nb$original) lrtest(fit.pois$original, fit.pois$proposed) lrtest(fit.nb$original, fit.nb$proposed) lrtest(fit.nb$proposed, fit.nb$final) lrtest(fit.pois$original, fit.nb$final) # NB final is best model, with -6428.5 LLK and AIC 12887 } }