#!/usr/bin/R # -------------------------------------------- # Load metadata for the human fusions project: # -------------------------------------------- library(cbrbase) library(tidyr) library(RColorBrewer) set_proj('DEVTRAJ', 'mosaicism') options(width=170) # Read in the batch metadata files: # --------------------------------- # Number the pfc samples: pt.dbdir = sub("DEVTRAJ","perturbseq",dbdir) pfc.df = readRDS(paste0(pt.dbdir, 'snPFC/batches/full_batch_mapping.Rds')) names(pfc.df)[1] = 'sampind' # Read in the 10x fusions metadata: fus.meta = read.delim('fusions_AD430/metadata/sample_table_forfusions.tsv', header=F) fus.meta = fus.meta[,c(1,2,3,4,5,7)] names(fus.meta) = c('projid', 'prefix','library_id', 'nlane', 'repl', 'sample') # Extract appropriate batch for each of the samples: # -------------------------------------------------- fus.meta$batch = sub(".*(SM-[A-Z0-9]{5}).*", "\\1", fus.meta$prefix) fus.meta$batch = sub("lhtsai_.*hansruedi_", "", fus.meta$batch) fus.meta$batch = sub("_Folder.*", "Tsa", fus.meta$batch) fus.meta$batch = sub("190918Tsa_last_16_pfc.*", "SM-Last16", fus.meta$batch) fus.meta$batch = sub("[/_].*", "", fus.meta$batch) fus.meta$batch[fus.meta$batch == "190520TsaB"] = 'SM-IHLAS' fus.meta$batch = sub("SM-", "SM_", fus.meta$batch) fus.meta$batch = sub("^1", "SM_1", fus.meta$batch) # Remap the re-sequenced files to proper batch: mapbatch = c("SM_190501Tsa", "SM_190516TsaA") ldf = fus.meta[fus.meta$batch %in% mapbatch, 'library_id', drop=F] ldf = merge(fus.meta[,c('library_id','batch')], ldf) ldf = ldf[!(ldf$batch %in% mapbatch),] names(ldf)[2] = 'batch2' fus.meta = merge(fus.meta, ldf, all.x=TRUE) ind = !is.na(fus.meta$batch2) fus.meta$batch[ind] = fus.meta$batch2[ind] fus.meta$batch2 = NULL # Merge with PFC data; all accounted for: fus.meta = merge(fus.meta, pfc.df, all.x=TRUE) dim(fus.meta[is.na(fus.meta$dataset),]) # Only MRPFC data is not added; that's ok. testdf = merge(fus.meta, pfc.df, all.y=TRUE) sum(is.na(testdf$prefix)) kept.individuals = unique(fus.meta$projid) print(paste(length(kept.individuals), 'unique projids')) print(paste(length(unique(fus.meta$sample)), 'unique datasets')) # Read in the cell-level metadata: # -------------------------------- cellmeta.rds = paste0(dbdir, 'PFC_HC_cell_metadata_for_fusions_AD430.Rds') if (!file.exists(cellmeta.rds)){ # Read PFC metadata: cellmeta = readRDS(paste0(pt.dbdir, 'snPFC/consensus_annot_snPFC.joint_annotation_metadata.Rds')) cellmeta = cellmeta[cellmeta$dataset %in% fus.meta$dataset,] cellmeta$barcode = sub(".*_","", cellmeta$obsnames) cellmeta$sampind = sub(".*-","", cellmeta$barcode) cellmeta = cellmeta[,c('dataset', 'barcode', 'major.celltype', 'cell_type_high_resolution', 'n_counts', 'n_genes', 'pct_ribo', 'pct_mito')] names(cellmeta)[3:4] = c('celltype', 'cthr') cellmeta$barcode = with(cellmeta, paste0(dataset, ':', sub("-.*","", barcode))) saveRDS(cellmeta, file=cellmeta.rds) } else { cellmeta = readRDS(cellmeta.rds) } # Read in the overall metadata: # ----------------------------- indmeta.proc.rds = 'Annotation/metadata_PFC_all_individuals_092520.addedVars.rds' indmeta_tsv = 'Annotation/metadata_PFC_all_individuals_092520.tsv' if (!file.exists(indmeta.proc.rds)){ metadata = read.delim(indmeta_tsv, header=T) metadata = metadata[metadata$projid %in% kept.individuals,] metadata$bd = 'CTRL' metadata$bd[metadata$braaksc %in% c('5','6')] = 'AD' metadata$bd2 = NA metadata$bd2[metadata$braaksc %in% c('0','1','2')] = 'CTRL' metadata$bd2[metadata$braaksc %in% c('5','6')] = 'AD' metadata$nrad = 'CTRL' metadata$nrad[metadata$niareagansc %in% c('1','2')] = 'AD' metadata$cogdx.ad = 'Low/Mild' metadata$cogdx.ad[metadata$cogdx %in% c('4','5')] = 'AD' metadata$Apoe_e4 = ifelse(metadata$apoe_genotype %in% c('24','44','34'), 'Yes','No') metadata$age_rescaled = metadata$age_death / 10 metadata$pmi_rescaled = metadata$pmi / 10 # Order variables: metadata$bd = factor(metadata$bd, levels=c('CTRL','AD')) metadata$bd2 = factor(metadata$bd2, levels=c('CTRL','AD')) metadata$nrad = factor(metadata$nrad, levels=c('CTRL','AD')) metadata$cogdx.ad = factor(metadata$cogdx.ad, levels=c('Low/Mild','AD')) # Save metadata: saveRDS(metadata, file=indmeta.proc.rds) } else { metadata = readRDS(indmeta.proc.rds) } # Load colors: source(paste0(bindir, 'multiRegion/set_colors.R'))