實驗數(shù)據(jù):
同一組織,分為兩組,control vs
treat,每組7例sample。數(shù)據(jù)第一列為基因名,后14列為對應(yīng)的count。
##bioconductor和edgeR包的安裝
source("http:///biocLite.R")
biocLite("edgeR")
library("limma")
library("edgeR")
##讀取數(shù)據(jù),方法隨意
rawdata<-read.delim("2.txt",header=T)
head(rawdata) #檢查讀入是否正確
y<-DGEList(counts=rawdata[,2:15],genes=rawdata[,1])
##過濾與標(biāo)準(zhǔn)化
left<-rowSums(cpm(y)>1)>=4 #過濾標(biāo)準(zhǔn)為至少one count per
million (cpm)
y<-y[left,]
y<-DGEList(counts=y$counts,genes=y$genes)
y<-calcNormFactors(y)#默認(rèn)為TMM標(biāo)準(zhǔn)化
##檢查樣本的outlier and relationship
y<-plotMDS(y)
##設(shè)計design matrix
group<-factor(c('H','H','H','H','H','H','H','M','M','M','M','M','M','M'))
design <- model.matrix(~group)
y<-DGEList(counts=rawdata[,2:15],genes=rawdata[,1])
##推測dispersion(離散度)
y<-estimateGLMCommonDisp(y,design,verbose=TRUE)
y<-estimateGLMTrendedDisp(y, design)
y<-estimateGLMTagwiseDisp(y, design)
##差異表達(dá)基因,to perform quasi-likelihood F-tests:
fit <- glmQLFit(y,design)
qlf <- glmQLFTest(fit,coef=2)
topTags(qlf)#前10個差異表達(dá)基因
##or 差異表達(dá)基因,to perform likelihood ratio tests:
fit<-glmFit(y, design)
lrt<-glmLRT(fit)
topTags(lrt)#前10個差異表達(dá)基因
##火山圖
summary(de<-decideTestsDGE(qlf))##qlf或可改為lrt
detags<-rownames(y)[as.logical(de)]
plotSmear(qlf, de.tags=detags)
abline(h=c(-4,4),col='blue') #藍(lán)線為2倍差異表達(dá)基因,差異表達(dá)的數(shù)據(jù)在qlf中
|
|