We first want to install all of the packages used in this analysis. We use the package renv
as a way to manage package versions. The following code was initially run to set up the package library.
install.packages("renv")
library(renv)
renv::init(bare = TRUE)
install.packages("BiocManager")
library(BiocManager)
install.packages(c("ggplot2", "magrittr", "knitr", "kableExtra", "ggrepel",
"stringr", "circlize", "ComplexHeatmap", "metap","limma",
"devtools", "rmarkdown", "formatR", "bitops", "caTools"),
repo = BiocManager::repositories())
devtools::install_github("montilab/hypeR")
renv::snapshot()
If you are attempting to reproduce this analysis, you can 1) install/load renv
, 2) copy/paste the renv.lock
file into your current working directory and 3) run the renv::restore()
command to automatically install all of the packages with the same version.
# Make sure the 'renv.lock' file is in your current working directory.
install.packages("renv")
library(renv)
renv::restore()
First, R packages will be loaded and scripts will be sourced.
source("lm_utils.R")
source("lm_expression_with_cnv.R")
library(ggplot2)
library(magrittr)
library(knitr)
library(kableExtra)
library(ggrepel)
library(hypeR)
library(stringr)
library(circlize)
library(ComplexHeatmap)
library(metap)
library(limma)
knitr::opts_chunk$set(fig.width=12, fig.height=8)
dir.create("Enrichment_Results", showWarnings = FALSE)
dir.create("LM_Results", showWarnings = FALSE)
We need to read in the expression matrix, ABSOLUTE calls, and copy ratio matrix.The expression matrix was obtained from (and was gzipped):
http://api.gdc.cancer.gov/data/3586c0da-64d0-4b74-a449-5ff4d9136611
The ABSOLUTE purity/ploidy estimates were obtained from:
http://api.gdc.cancer.gov/data/4f277128-f793-4354-a13d-30cc7fe9f6b5
The copy ratio matrix was obtained by running GISTIC2.0 on the seg file obtained from:
http://api.gdc.cancer.gov/data/00a32f7a-c85f-4f86-850d-be53973cbc4d
Clinical data with tumor type information was obtained from:
http://api.gdc.cancer.gov/data/4f277128-f793-4354-a13d-30cc7fe9f6b5
# Read in and process RNA-seq data
expression = read.table(gzfile("../../Data/EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.tsv.gz"), header=TRUE, stringsAsFactors = FALSE, sep = "\t", check.names = FALSE, row.names = 1)
expression.gene.name = t(as.data.frame(strsplit(rownames(expression), "\\|")))
i = expression.gene.name[,1] == '?' | expression.gene.name[,1] == "SLC35E2"
expression = log2(expression[!i,] + 1)
rownames(expression) = expression.gene.name[!i,1]
colnames(expression) = substring(colnames(expression), 1, 15)
# Read in ABSOLUTE data
absolute = read.table("../../../Mutation/Data/TCGA_mastercalls.abs_tables_JSedit.fixed.txt", header = TRUE, stringsAsFactors = FALSE, sep = "\t", row.names = 1)
absolute$WGD_Status = factor(ifelse(absolute$Genome.doublings > 0, "Yes", "No"), levels = c("No", "Yes"))
# Read in and process CNV ratios from GISTIC
cnv = read.table(gzfile("../../Data/all_data_by_genes_whitelisted.tsv.gz"), sep = "\t", header = TRUE, stringsAsFactors = FALSE, check.names = FALSE, row.names = 1)
cnv.info = cnv[,1:2]
cnv = as.matrix(cnv[,-(1:2)])
colnames(cnv) = substring(colnames(cnv), 1, 15)
chrom = str_match(cnv.info[,2], "^(\\w+)[pq]")
cnv.info = cbind(cnv.info, Chrom=factor(chrom[,2], levels=c(1:22, "X", "Y")))
# Read in clinical information and get tumor type info
clin = read.table(gzfile("../../Data/clinical_PANCAN_patient_with_followup.tsv.gz"), sep = "\t", header = TRUE, stringsAsFactors = FALSE, quote = "", row.names = 2)
clin = subset(clin, !is.na(acronym))
# Get initial overlap of tumors between 3 assays
tumor.overlap = intersect(intersect(colnames(expression), rownames(absolute)), colnames(cnv))
# Subset to primary tumors
tumor.overlap = tumor.overlap[substring(tumor.overlap, 14, 15) == "01"]
# Suset to tumors with annotated tumor type
tumor.overlap = tumor.overlap[substring(tumor.overlap, 1, 12) %in% rownames(clin)]
# Get gene overlap between gene and cnv assays and expression across at least 10% of samples
gene.overlap.initial = intersect(rownames(expression), rownames(cnv))
percent.absent = rowSums(is.na(expression[,tumor.overlap]) | expression[,tumor.overlap] == 0) / length(tumor.overlap)
gene.overlap = intersect(rownames(expression)[percent.absent < 0.90], rownames(cnv))
# Subset assays
cnv.overlap = cnv[gene.overlap,tumor.overlap]
expression.overlap = expression[gene.overlap,tumor.overlap]
absolute.overlap = absolute[tumor.overlap,]
cnv.info.overlap = cnv.info[gene.overlap,]
tumor.type = clin[substring(colnames(expression.overlap), 1, 12), "acronym"]
rm(expression)
rm(cnv)
We will iterate through all tumor types and identify genes that are differentially expressed with respect to WGD status while controlling for purity and local copy number.
tumor.type.ids = unique(tumor.type)
result = list()
for(i in tumor.type.ids) {
cat("Analyzing", i, "...\n")
# Subset to tumors within a tumor type and to genes consistently expressed
ix = tumor.type == i
covars = absolute.overlap[ix,c("WGD_Status", "purity")]
temp.res = lm_expression_with_cnv(expression.overlap[,ix],
cnv.overlap[,ix],
covars)
# Save to file
fn = paste0("LM_Results/LM_Exp_tilde_CNV_WGD_Purity_", i, ".txt.gz")
write.table(temp.res, gzfile(fn), row.names=FALSE, sep="\t", quote = FALSE)
result[[i]] = temp.res
}
## Analyzing ACC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing BLCA ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing DLBC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing UCEC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing HNSC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing PRAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing KIRP ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing PAAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing SARC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing CESC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing COAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing LUSC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing READ ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing KIRC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing LIHC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing BRCA ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing UCS ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing GBM ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing KICH ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing THCA ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing LGG ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing LUAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing MESO ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing PCPG ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing TGCT ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing UVM ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing THYM ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing CHOL ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing OV ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing ESCA ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing STAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
A heatmap will be generated for the top 50 up- and down-regulated genes using the ComplexHeatmap
package. Each tumor type is plotted on a separate tab.
n.show = 50
# Set up column color bars
purity_col = colorRamp2(c(0, 25, 50, 75, 100)/100, c("blue", "green", "yellow", "orange", "red"))
# Set up row color bars
set.seed(123)
cnv.col = sample(colors(distinct = TRUE), 24)
names(cnv.col) = levels(cnv.info.overlap$Chrom)
for(i in names(result)) {
ix = tumor.type == i
deg.list = c()
up = subset(result[[i]], WGD_Status_FDR < 0.05 & WGD_Status_Estimate > 0)
if(nrow(up) > 0) {
up.o = up[order(up$WGD_Status_Estimate, decreasing = TRUE),]
deg.list = c(deg.list, head(as.character(up.o[,1]), n.show))
}
down = subset(result[[i]], WGD_Status_FDR < 0.05 & WGD_Status_Estimate < 0)
if(nrow(down) > 0) {
down.o = down[order(down$WGD_Status_Estimate, decreasing = FALSE),]
deg.list = c(deg.list, head(as.character(down.o[,1]), n.show))
}
if(length(deg.list) == 0) {
next()
}
# Subset and scale data
deg.data = t(scale(t(expression.overlap[deg.list, ix])))
deg.data[deg.data < -2] = -2
deg.data[deg.data > 2] = 2
column_ha = HeatmapAnnotation(df = absolute.overlap[ix,c("WGD_Status",
"purity")],
col = list(purity = purity_col,
WGD_Status = c("Yes" = "black",
"No" = "White")))
row_ha = rowAnnotation(df=cnv.info.overlap[rownames(deg.data),3,drop=F],
col=list(Chrom=cnv.col))
cat(paste0('## ', i, ' \n\n'))
draw(Heatmap(deg.data,
show_column_names = FALSE,
top_annotation = column_ha,
left_annotation = row_ha,
row_names_gp = gpar(fontsize=5),
use_raster = TRUE))
cat(' \n\n')
}