1 Set up package environment

We first want to install all of the packages used in this analysis. We use the package renv as a way to manage package versions. The following code was initially run to set up the package library.

install.packages("renv")
library(renv)

renv::init(bare = TRUE)
install.packages("BiocManager")
library(BiocManager)
install.packages(c("ggplot2", "magrittr", "knitr", "kableExtra", "ggrepel",
                   "stringr", "circlize", "ComplexHeatmap", "metap","limma",
                   "devtools", "rmarkdown", "formatR", "bitops", "caTools"),
                 repo = BiocManager::repositories())
devtools::install_github("montilab/hypeR")
renv::snapshot()

If you are attempting to reproduce this analysis, you can 1) install/load renv, 2) copy/paste the renv.lock file into your current working directory and 3) run the renv::restore() command to automatically install all of the packages with the same version.

# Make sure the 'renv.lock' file is in your current working directory.
install.packages("renv")
library(renv)
renv::restore()

2 Load in scripts and R packages

First, R packages will be loaded and scripts will be sourced.

source("lm_utils.R")
source("lm_expression_with_cnv.R")
library(ggplot2)
library(magrittr)
library(knitr)
library(kableExtra)
library(ggrepel)
library(hypeR)
library(stringr)
library(circlize)
library(ComplexHeatmap)
library(metap)
library(limma)
knitr::opts_chunk$set(fig.width=12, fig.height=8)

dir.create("Enrichment_Results", showWarnings = FALSE)
dir.create("LM_Results", showWarnings = FALSE)

We need to read in the expression matrix, ABSOLUTE calls, and copy ratio matrix.The expression matrix was obtained from (and was gzipped):

http://api.gdc.cancer.gov/data/3586c0da-64d0-4b74-a449-5ff4d9136611

The ABSOLUTE purity/ploidy estimates were obtained from:

http://api.gdc.cancer.gov/data/4f277128-f793-4354-a13d-30cc7fe9f6b5

The copy ratio matrix was obtained by running GISTIC2.0 on the seg file obtained from:

http://api.gdc.cancer.gov/data/00a32f7a-c85f-4f86-850d-be53973cbc4d

Clinical data with tumor type information was obtained from:

http://api.gdc.cancer.gov/data/4f277128-f793-4354-a13d-30cc7fe9f6b5

# Read in and process RNA-seq data
expression = read.table(gzfile("../../Data/EBPlusPlusAdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.tsv.gz"), header=TRUE, stringsAsFactors = FALSE, sep = "\t", check.names = FALSE, row.names = 1)
expression.gene.name = t(as.data.frame(strsplit(rownames(expression), "\\|")))
i = expression.gene.name[,1] == '?' | expression.gene.name[,1] == "SLC35E2"
expression = log2(expression[!i,] + 1)
rownames(expression) = expression.gene.name[!i,1]
colnames(expression) = substring(colnames(expression), 1, 15)

# Read in ABSOLUTE data
absolute = read.table("../../../Mutation/Data/TCGA_mastercalls.abs_tables_JSedit.fixed.txt", header = TRUE, stringsAsFactors = FALSE, sep = "\t", row.names = 1)
absolute$WGD_Status = factor(ifelse(absolute$Genome.doublings > 0, "Yes", "No"), levels = c("No", "Yes"))

# Read in and process CNV ratios from GISTIC
cnv = read.table(gzfile("../../Data/all_data_by_genes_whitelisted.tsv.gz"), sep = "\t", header = TRUE, stringsAsFactors = FALSE, check.names = FALSE, row.names = 1)
cnv.info = cnv[,1:2]
cnv = as.matrix(cnv[,-(1:2)])
colnames(cnv) = substring(colnames(cnv), 1, 15)
chrom = str_match(cnv.info[,2], "^(\\w+)[pq]")
cnv.info = cbind(cnv.info, Chrom=factor(chrom[,2], levels=c(1:22, "X", "Y")))

# Read in clinical information and get tumor type info
clin = read.table(gzfile("../../Data/clinical_PANCAN_patient_with_followup.tsv.gz"), sep = "\t", header = TRUE, stringsAsFactors = FALSE, quote = "", row.names = 2)
clin = subset(clin, !is.na(acronym))

# Get initial overlap of tumors between 3 assays
tumor.overlap = intersect(intersect(colnames(expression), rownames(absolute)), colnames(cnv))

# Subset to primary tumors
tumor.overlap = tumor.overlap[substring(tumor.overlap, 14, 15) == "01"]

# Suset to tumors with annotated tumor type
tumor.overlap = tumor.overlap[substring(tumor.overlap, 1, 12) %in% rownames(clin)]

# Get gene overlap between gene and cnv assays and expression across at least 10% of samples
gene.overlap.initial = intersect(rownames(expression), rownames(cnv))
percent.absent = rowSums(is.na(expression[,tumor.overlap]) | expression[,tumor.overlap] == 0) / length(tumor.overlap)
gene.overlap = intersect(rownames(expression)[percent.absent < 0.90], rownames(cnv))

# Subset assays
cnv.overlap = cnv[gene.overlap,tumor.overlap]
expression.overlap = expression[gene.overlap,tumor.overlap]
absolute.overlap = absolute[tumor.overlap,]
cnv.info.overlap = cnv.info[gene.overlap,]
tumor.type = clin[substring(colnames(expression.overlap), 1, 12), "acronym"]

rm(expression)
rm(cnv)

3 Association of expression with WGS status

We will iterate through all tumor types and identify genes that are differentially expressed with respect to WGD status while controlling for purity and local copy number.

tumor.type.ids = unique(tumor.type)
result = list()
for(i in tumor.type.ids) {
  cat("Analyzing", i, "...\n")
  
  # Subset to tumors within a tumor type and to genes consistently expressed
  ix = tumor.type == i
  covars = absolute.overlap[ix,c("WGD_Status", "purity")]
  
  temp.res = lm_expression_with_cnv(expression.overlap[,ix],
                                    cnv.overlap[,ix],
                                    covars)
  
  # Save to file
  fn = paste0("LM_Results/LM_Exp_tilde_CNV_WGD_Purity_", i, ".txt.gz")
  write.table(temp.res, gzfile(fn), row.names=FALSE, sep="\t", quote = FALSE)
  
  result[[i]] = temp.res
}
## Analyzing ACC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing BLCA ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing DLBC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing UCEC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing HNSC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing PRAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing KIRP ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing PAAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing SARC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing CESC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing COAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing LUSC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing READ ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing KIRC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing LIHC ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing BRCA ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing UCS ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing GBM ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing KICH ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing THCA ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing LGG ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing LUAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing MESO ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing PCPG ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing TGCT ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing UVM ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing THYM ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing CHOL ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing OV ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing ESCA ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed
## Analyzing STAD ...
## Analysis of 1000 genes completed
## Analysis of 2000 genes completed
## Analysis of 3000 genes completed
## Analysis of 4000 genes completed
## Analysis of 5000 genes completed
## Analysis of 6000 genes completed
## Analysis of 7000 genes completed
## Analysis of 8000 genes completed
## Analysis of 9000 genes completed
## Analysis of 10000 genes completed
## Analysis of 11000 genes completed
## Analysis of 12000 genes completed
## Analysis of 13000 genes completed
## Analysis of 14000 genes completed
## Analysis of 15000 genes completed
## Analysis of 16000 genes completed
## Analysis of 17000 genes completed

4 Generate Heatmaps

A heatmap will be generated for the top 50 up- and down-regulated genes using the ComplexHeatmap package. Each tumor type is plotted on a separate tab.

n.show = 50

# Set up column color bars
purity_col = colorRamp2(c(0, 25, 50, 75, 100)/100, c("blue", "green", "yellow", "orange", "red"))

# Set up row color bars
set.seed(123)
cnv.col = sample(colors(distinct = TRUE), 24)
names(cnv.col) = levels(cnv.info.overlap$Chrom)

for(i in names(result)) {
  ix = tumor.type == i
  deg.list = c()
  up = subset(result[[i]], WGD_Status_FDR < 0.05 & WGD_Status_Estimate > 0)
  if(nrow(up) > 0) {
    up.o = up[order(up$WGD_Status_Estimate, decreasing = TRUE),]
    deg.list = c(deg.list, head(as.character(up.o[,1]), n.show))
  }
  
  down = subset(result[[i]], WGD_Status_FDR < 0.05 & WGD_Status_Estimate < 0)
  if(nrow(down) > 0) {
    down.o = down[order(down$WGD_Status_Estimate, decreasing = FALSE),]
    deg.list = c(deg.list, head(as.character(down.o[,1]), n.show))
  }
  
  if(length(deg.list) == 0) {
    next()
  }
  # Subset and scale data
  deg.data = t(scale(t(expression.overlap[deg.list, ix])))
  deg.data[deg.data < -2] = -2
  deg.data[deg.data > 2] = 2

  column_ha = HeatmapAnnotation(df = absolute.overlap[ix,c("WGD_Status", 
                                                           "purity")],
                              col = list(purity = purity_col,
                                          WGD_Status = c("Yes" = "black",
                                                        "No" = "White")))

  row_ha = rowAnnotation(df=cnv.info.overlap[rownames(deg.data),3,drop=F],
                       col=list(Chrom=cnv.col))
  
  cat(paste0('## ', i, ' \n\n'))
  draw(Heatmap(deg.data, 
          show_column_names = FALSE,
          top_annotation = column_ha,
          left_annotation = row_ha,
          row_names_gp = gpar(fontsize=5),
          use_raster = TRUE))
  cat(' \n\n')
}

4.1 ACC