【原】表觀調(diào)控13張圖之三。。。

健明 2021-07-14

展開全文

表觀調(diào)控13張圖之一證明基因干擾有效性
表觀調(diào)控13張圖之二相關(guān)性熱圖看不同樣本相關(guān)性

現(xiàn)在我們再解讀一下第三張圖，如果你對視頻感興趣，還是可以繼續(xù)留郵箱，我們在圣誕節(jié)（明天）統(tǒng)一發(fā)郵件給大家全部的視頻云盤鏈接和配套代碼哈！

本章節(jié)我們的視頻審查員（劉博-二貨潛）將繼續(xù)帶領(lǐng)大家學習視頻，并且復現(xiàn)附件中Figure S13的一張圖，如下：

本文的目錄如下：

DESeq2 尋找差異基因
可視化

(1)MAplot：圖a和圖b

(2)差異基因韋恩圖:UpSetR/VennDiagram

(3)兩樣本 log2FC 相關(guān)性散點圖

DESeq2尋找差異基因

萬事開頭前先看包的說明書：DESeq2 manual ，里面是講的很詳細很全面的，所以下面與文章不相關(guān)的圖就不展示出來了。

rm(list = ls())
options(stringsAsFactors = F)
a = read.table('../figure-01-check-gene-expression/all.counts.id.txt', header = T)
dim(a)
dat = a[,7:16]

# 第一列為基因名，將其賦值給行名, 做韋恩圖需要
rownames(dat)=a[, 1]

# 查看前四行和前四列信息
dat[1:4, 1:4]
library(stringr)

# 要擅用 str_split 切割字符串，表示按照下劃線 "_" 對列名進行切割，取第一列；即樣本名
# 三組，每個樣品一組，即 PhoKO、SppsKO、WT
group_list = str_split(colnames(dat), '_', simplify = T)[, 1] 
table(group_list)

######################################################################
###################      Firstly for DEseq2      #####################
######################################################################
# 一步運行
# 這里我們主要是得到差異基因中間部分就不細說
if(T){
  # 賦值表達矩陣和分組信息為一個新的變量，方便以后直接調(diào)用
  exprSet = dat
  suppressMessages(library(DESeq2)) # 加載包

  (colData <- data.frame(row.names = colnames(exprSet), 
                         group_list = group_list) )

  # 構(gòu)建一個表達矩陣
  dds <- DESeqDataSetFromMatrix(countData = exprSet,
                                colData = colData,
                                design = ~ group_list)
  dds <- DESeq(dds)
  png("qc_dispersions.png", 1000, 1000, pointsize = 20)
  plotDispEsts(dds, main="Dispersion plot")
  dev.off()


  rld <- rlogTransformation(dds)
  exprMatrix_rlog = assay(rld) 
  #write.csv(exprMatrix_rlog,'exprMatrix.rlog.csv' )

  normalizedCounts1 <- t( t(counts(dds)) / sizeFactors(dds) )
  # normalizedCounts2 <- counts(dds, normalized=T) # it's the same for the tpm value
  # we also can try cpm or rpkm from edgeR pacage
  exprMatrix_rpm = as.data.frame(normalizedCounts1) 
  head(exprMatrix_rpm)
  #write.csv(exprMatrix_rpm,'exprMatrix.rpm.csv' )

  png("DEseq_RAWvsNORM.png", height = 800, width = 800)
  par(cex = 0.7)
  n.sample = ncol(exprSet)
  if(n.sample > 40) par(cex = 0.5)
  cols <- rainbow(n.sample*1.2)
  par(mfrow = c(2, 2))
  boxplot(exprSet, col = cols,main = "expression value", las = 2)
  boxplot(exprMatrix_rlog, col = cols, main = "expression value", las = 2)
  hist(as.matrix(exprSet))
  hist(exprMatrix_rlog)
  dev.off()

  library(RColorBrewer)
  (mycols <- brewer.pal(8, "Dark2")[1:length(unique(group_list))])
  cor(as.matrix(exprSet))
  # Sample distance heatmap
  sampleDists <- as.matrix(dist(t(exprMatrix_rlog)))
  #install.packages("gplots",repos = "http://cran.us.")
  library(gplots)

  png("qc-heatmap-samples.png", w = 1000, h = 1000, pointsize = 20)
  heatmap.2(as.matrix(sampleDists), key = F, trace = "none",
            col = colorpanel(100, "black", "white"),
            ColSideColors = mycols[group_list], RowSideColors = mycols[group_list],
            margin = c(10, 10), main="Sample Distance Matrix")
  dev.off()

  cor(exprMatrix_rlog) 

  table(group_list)
  res <- results(dds, 
                 contrast=c("group_list", "SppsKO", "WT"))
  resOrdered <- res[order(res$padj),]
  head(resOrdered)
  DEG_SppsKO = as.data.frame(resOrdered)
  DEG_SppsKO = na.omit(DEG_SppsKO)

  table(group_list)
  res <- results(dds, 
                 contrast = c("group_list","PhoKO","WT"))
  resOrdered <- res[order(res$padj),]
  head(resOrdered)
  DEG_PhoKO = as.data.frame(resOrdered)
  DEG_PhoKO = na.omit(DEG_PhoKO)
  save(DEG_PhoKO, DEG_SppsKO,file = 'deg_output.Rdata')
}

簡化一下，如果不需要中間的信息，我們只需要差異基因的話，那么只需要運行以下代碼：

if(T){
  # 賦值表達矩陣和分組信息為一個新的變量，方便以后直接調(diào)用
  exprSet = dat
  suppressMessages(library(DESeq2)) # 加載包

  (colData <- data.frame(row.names = colnames(exprSet), 
                         group_list = group_list) )

  # 構(gòu)建一個表達矩陣
  dds <- DESeqDataSetFromMatrix(countData = exprSet,
                                colData = colData,
                                design = ~ group_list)
  dds <- DESeq(dds)

# 下面我們得到 Spps 突變后的差異基因    
  res <- results(dds, 
                 contrast=c("group_list", "SppsKO", "WT")) 
# 注意這里是前面比后面，別把順序搞反了，到時候一不注意結(jié)果就是反的。所以拿差異倍數(shù)對著原始 reads 比較一下。

  resOrdered <- res[order(res$padj),] # 按照 padj 值進行排序
  head(resOrdered)
  DEG_SppsKO = as.data.frame(resOrdered) # 將數(shù)據(jù)轉(zhuǎn)變?yōu)?nbsp;data.frame 數(shù)據(jù)框
  DEG_SppsKO = na.omit(DEG_SppsKO) # 去除包含 NA 值的行

# 下面我們得到 Pho 突變后的差異基因 
  table(group_list)
  res <- results(dds, 
                 contrast = c("group_list","PhoKO","WT"))
  resOrdered <- res[order(res$padj),]
  head(resOrdered)
  DEG_PhoKO = as.data.frame(resOrdered)
  DEG_PhoKO = na.omit(DEG_PhoKO)

# 將關(guān)鍵結(jié)果以 Rdata 形式保存到本地，下次如有需要就不需要重新用 DESeq2 計算了    
  save(DEG_PhoKO, DEG_SppsKO,file = 'deg_output.Rdata')
}

好了，上面我們就得到了差異基因。

可視化

MAplot: 圖 a 和圖 b

什么是 MAplot ?DESeq2 包說明書中的一段話：

In DESeq2, the function plotMA shows the log₂ fold changes attributable to a given variable over the mean of normalized counts for all the samples in the DESeqDataSet. Points will be colored red if the adjusted p value is less than 0.1. Points which fall out of the window are plotted as open triangles pointing either up or down.

也就是說表示的是變化倍數(shù) log₂(Fold change) 與所有樣本標準化后的 counts 數(shù)的平均值之間的關(guān)系。那么怎么畫呢？如果看過 DESeq2 說明書就知道這是 MA-plot 部分。由于我們這里是將三組都放在一個 dds 中，所以我們得分別挑出 Pho 和 Spps 進行可視化。

首先查看 dds 中的分組情況：

resultsNames(dds)

[1] "Intercept"                  "group_list_SppsKO_vs_PhoKO" "group_list_WT_vs_PhoKO"

lfcShrink 收縮 FC 三種方法如下（這里直接放原文）：

normal is the the original DESeq2 shrinkage estimator, an adaptive Normal distribution as prior. This is currently the default, although the default will likely change to apeglm in the October 2018 release given apeglm’s superior performance.
apeglm is the adaptive t prior shrinkage estimator from the apeglm package (Zhu, Ibrahim, and Love 2018).
ashr is the adaptive shrinkage estimator from the ashr package (Stephens 2016). Here DESeq2 uses the ashr option to fit a mixture of Normal distributions to form the prior, with method="shrinkage".

繪制 Spps

# apeglm 方法需要安裝 apeglm 包
# BiocManager::install("apeglm")

# ashr 方法同樣需要安裝額外依賴的包
# BiocManager::install("ashr")

Spps_resLFC <- lfcShrink(dds, coef  = "group_list_SppsKO_vs_PhoKO", type = "apeglm")
Spps_resNorm <- lfcShrink(dds, coef = "group_list_SppsKO_vs_PhoKO", type = "normal")
Spps_resAsh <- lfcShrink(dds, coef  = "group_list_SppsKO_vs_PhoKO", type = "ashr")

par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim <- c(1,1e5); ylim <- c(-3,3)

plotMA(Spps_resLFC, xlim  = xlim, ylim = ylim, main = "apeglm")
plotMA(Spps_resNorm, xlim = xlim, ylim = ylim, main = "normal")
plotMA(Spps_resAsh, xlim  = xlim, ylim = ylim, main = "ashr")

dev.off()

繪制 Pho

Pho_resLFC <- lfcShrink(dds, coef  = "group_list_WT_vs_PhoKO", type = "apeglm")
Pho_resNorm <- lfcShrink(dds, coef = "group_list_WT_vs_PhoKO", type = "normal")
Pho_resAsh <- lfcShrink(dds, coef  = "group_list_WT_vs_PhoKO", type = "ashr")

par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim <- c(1,1e5); ylim <- c(-3,3)

plotMA(Pho_resLFC, xlim  = xlim, ylim = ylim, main = "apeglm")
plotMA(Pho_resNorm, xlim = xlim, ylim = ylim, main = "normal")
plotMA(Pho_resAsh, xlim  = xlim, ylim = ylim, main = "ashr")
dev.off()

好了，我們選取 normal （開心就好，你選哪個），來繪制圖 a 和 b

par(mfrow=c(1,2), mar=c(4,4,2,1))
plotMA(Spps_resNorm, xlim = xlim, ylim = ylim, main = "Spps_normal")
plotMA(Pho_resNorm, xlim = xlim, ylim = ylim, main = "Pho_normal")
dev.off()

差異基因韋恩圖:UpSetR/VennDiagram

我們下面將用兩種方式來展示交集

第一種：我們使用 R 包 UpSetR 來繪制差異基因之間的韋恩圖（多組時候，這種更加一目了然）

library(UpSetR)
load(file = 'deg_output.Rdata')

colnames(DEG_PhoKO)
DEG_PhoKO$change = ifelse(DEG_PhoKO$padj > 0.05,'stable',
                          ifelse(DEG_PhoKO$log2FoldChange > 0, 'up', 'down'))

DEG_SppsKO$change=ifelse(DEG_SppsKO$padj>0.05, 'stable',
                         ifelse(DEG_SppsKO$log2FoldChange > 0, 'up', 'down'))

SppsKO_up   = rownames(DEG_SppsKO[DEG_SppsKO$change == 'up',])
SppsKO_down = rownames(DEG_SppsKO[DEG_SppsKO$change == 'down',])
PhoKO_up    = rownames(DEG_PhoKO[DEG_PhoKO$change == 'up',])
PhoKO_down  = rownames(DEG_PhoKO[DEG_PhoKO$change == 'down',])

allG = unique(c(SppsKO_up, SppsKO_down, PhoKO_up, PhoKO_down))

df = data.frame(allG = allG,
              SppsKO_up   = as.numeric(allG %in% SppsKO_up),
              SppsKO_down = as.numeric(allG %in% SppsKO_down),
              PhoKO_up    = as.numeric(allG %in% PhoKO_up),
              PhoKO_down  = as.numeric(allG %in% PhoKO_down))

upset(df)

第二種：我們使用 VennDiagram來展示，也是就是文中那種圖

# 這里直接 copy 琪同學的
# 鏈接: https://mp.weixin.qq.com/s/Pg0mjz7mD73atMnHz7jv1A

# 導入本地字體，不然 `Arial` 字體會警告
library("extrafont")
font_import()
loadfonts()

load(file = 'deg_output.Rdata')

colnames(DEG_PhoKO)
DEG_PhoKO$change = ifelse(DEG_PhoKO$padj > 0.05,'stable',
                          ifelse(DEG_PhoKO$log2FoldChange > 0, 'up', 'down'))

DEG_SppsKO$change=ifelse(DEG_SppsKO$padj>0.05, 'stable',
                         ifelse(DEG_SppsKO$log2FoldChange > 0, 'up', 'down'))

SppsKO_up   = rownames(DEG_SppsKO[DEG_SppsKO$change == 'up',])
SppsKO_down = rownames(DEG_SppsKO[DEG_SppsKO$change == 'down',])
PhoKO_up    = rownames(DEG_PhoKO[DEG_PhoKO$change == 'up',])
PhoKO_down  = rownames(DEG_PhoKO[DEG_PhoKO$change == 'down',])

library(VennDiagram)
venn.diagram(
  x = list(
    "Up in phoKO"    = PhoKO_up,
    "Down in phoKO"  = PhoKO_down,
    "Up in SppsKO"   = SppsKO_up,
    "Down in SppsKO" = SppsKO_down
    ),
  filename       = "Venn.png", # 保存到當前目錄
  # stroke 顏色 形式 粗細
  col            = "#000000", lwd = 3, lty = 1,
  # 填充 透明度
  # 顏色可以參考前一篇，使用 takecolor 自己取色
  fill           = c("#D3E7F0", "#9FBEDB", "#A0D191", "#6DAE8A"),
  alpha          = 0.50,
  # 里外字體大小形式
  cex            = 1.5,
  fontfamily     = "Arial",
  fontface       = "bold",
  cat.cex        = 1.5,
  cat.dist       = 0.2,
  cat.fontfamily = "Arial",
  # 圖像整體位置 分辨率
  margin         = 0.2,
  resolution     = 300)

與文章趨勢基本一致?？梢钥闯?Spps 和 Pho 共同調(diào)控許多基因，說明這兩基因有一定的關(guān)系。

兩樣本 log2FC 相關(guān)性散點圖

load(file = 'deg_output.Rdata')
library(ggpubr)
DEG_PhoKO = DEG_PhoKO[rownames(DEG_SppsKO),]
po = data.frame(SppsKO = DEG_SppsKO$log2FoldChange,
              PhoKO = DEG_PhoKO$log2FoldChange)

sp <- ggscatter(po, 'SppsKO', 'PhoKO',
                add        = "reg.line",  # Add regressin line
                add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
                conf.int   = TRUE # Add confidence interval
)
# Add correlation coefficient
sp + stat_cor(method  = "pearson", 
              label.x = -10, label.y = 10) # 相關(guān)系數(shù)顯示位置