添加 RNA-arry.R
添加RNA基因芯片数据分析模板 Signed-off-by: 生信分析 <bioinfo@baihub.cn>
This commit is contained in:
parent
f53e29348c
commit
c7b7f636a9
70
RNA-arry.R
Normal file
70
RNA-arry.R
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
## Written By LiShang
|
||||||
|
## Notice:适用于基因芯片平台
|
||||||
|
|
||||||
|
#加载依赖包
|
||||||
|
library(GEOquery)
|
||||||
|
library(limma)
|
||||||
|
library(ggplot2)
|
||||||
|
library(patchwork)
|
||||||
|
library(stringr)
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
##############################从GEO下载并提取数据##############################
|
||||||
|
|
||||||
|
setwd("~/arry")
|
||||||
|
#下载芯片数据
|
||||||
|
gse <- getGEO("GSE15852", destdir = ".")[[1]]
|
||||||
|
#提取基因表达矩阵、样本分组信息
|
||||||
|
exp <- exprs(gse)
|
||||||
|
grp <- pData(gse)
|
||||||
|
|
||||||
|
###表达矩阵数据前处理
|
||||||
|
#将表达矩阵的行名(探针ID)转换为Gene Symbol
|
||||||
|
#方法一:直接从GEO拿数据,好处是方便快捷,通用性高
|
||||||
|
gene_symbols <- fData(gse)[,c("ID","Gene Symbol")]
|
||||||
|
gene_symbols <- setNames(gene_symbols$`Gene Symbol`,gene_symbols$ID)[rownames(exp)]
|
||||||
|
#方法二:通过芯片提供的R包拿数据,数据不如GEO的全,好处是基因名短
|
||||||
|
#install.packages("hgu133a.db")
|
||||||
|
library(hgu133a.db)
|
||||||
|
gene_symbols <- toTable(hgu133aSYMBOL)[,c("probe_id","symbol")]
|
||||||
|
gene_symbols <- setNames(gene_symbols$symbol,gene_symbols$probe_id)[rownames(exp)]
|
||||||
|
|
||||||
|
#合并相同基因的多个表达值(平均数法)
|
||||||
|
exp <- aggregate(exp, by = list(gene_symbols), FUN = mean)
|
||||||
|
rownames(exp) <- exp$Group.1
|
||||||
|
exp <- exp[, -1]
|
||||||
|
|
||||||
|
###样本分组数据前处理
|
||||||
|
#将样本按pData$title分为normal组和cancer组,并转换为factor
|
||||||
|
grp <- grp[colnames(exp),]
|
||||||
|
grp <- ifelse(str_detect(grp$title,"Normal"),"normal","cancer") %>%
|
||||||
|
factor(c("normal","cancer"))
|
||||||
|
|
||||||
|
############################样本质量控制与标准化################################
|
||||||
|
|
||||||
|
pca_plot1 <- as.data.frame(prcomp(t(exp))$x) %>%
|
||||||
|
ggplot(aes(x = PC1, y = PC2, colour = grp)) +
|
||||||
|
geom_point() +
|
||||||
|
stat_ellipse(level = 0.95, show.legend = F) +
|
||||||
|
theme_bw() +
|
||||||
|
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) +
|
||||||
|
ggtitle("Before Normalize")
|
||||||
|
|
||||||
|
exp <- normalizeBetweenArrays(exp, method="quantile")
|
||||||
|
if(max(exp)>50) exp <- log2(exp + 1)
|
||||||
|
|
||||||
|
pca_plot2 <- as.data.frame(prcomp(t(exp))$x) %>%
|
||||||
|
ggplot(aes(x = PC1, y = PC2, colour = grp)) +
|
||||||
|
geom_point() +
|
||||||
|
stat_ellipse(level = 0.95, show.legend = F) +
|
||||||
|
theme_bw() +
|
||||||
|
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) +
|
||||||
|
ggtitle("After Normalize")
|
||||||
|
|
||||||
|
pca_plot1 + theme(legend.position = "none") + pca_plot2
|
||||||
|
|
||||||
|
##############################差异基因表达分析##################################
|
||||||
|
|
||||||
|
fit <- lmFit(exp, model.matrix(~grp))
|
||||||
|
fit <- eBayes(fit)
|
||||||
|
deg <- topTable(fit, coef="grpcancer", adjust.method="fdr", number=Inf)
|
Loading…
Reference in New Issue
Block a user