需要的数据:
- 基因表达矩阵:行是基因,列是细胞
- 细胞分组信息:一共一列,行名为细胞,第一列名字为
Cell.type,代表分组信息
依赖的R包
- GSEAbase
- limma
- dplyr
- tidyr
- GSVA
- pystr (github)
- purrr
- tibble
- pheatmap
代码
library(GSEABase)library(Biobase)library(genefilter)library(limma)library(RColorBrewer)library(GSVA)library(dplyr)library(pystr)##这里可以任意切换geneset,只要你有geneSets <- getGmt("/dellfsqd2/ST_LBI/USER/panxiaoguang/SingleCell/liangxue_data/cluster_1/GSVA/DataBase/c3.tft.v7.0.symbols.gmt")re_grp<-function(x,s){ purrr::map_chr(x,function(haha){if (haha==s){"A"}else{"B"}}) }###该函数x为分组编号,或者是细胞类型,或者是时间序列,cellinfo为细胞分组(行名为细胞,Cell.type为分组)###num为想要提取的top n基因或者pathway,依赖dplyr,limmacal_Diff<-function(x,cellInfo,res_es,num){ need<-cellInfo%>%transmute(new_group=re_grp(Cell.type,x))%>%.$new_group grouP<-as.factor(need) desigN<-model.matrix(~grouP+0) rownames(desigN)<-rownames(cellInfo) comparE<-makeContrasts(grouPA-grouPB,levels=desigN) fiT <- lmFit(res_es, desigN) fiT2 <- contrasts.fit(fiT, comparE) fiT3 <- eBayes(fiT2) Diff<-topTable(fiT3,p.value=0.05,coef=1,num=num) if (nrow(Diff)>0){ Diff$cluster<-x Diff<-Diff%>%tibble::rownames_to_column(var="Name")%>%tbl_df()%>%select(Name,logFC,t,P.Value,adj.P.Val,cluster) }else{ Diff<-tibble::tibble(Name=NA,logFC=NA,t=NA,P.Value=NA,adj.P.Val=NA,cluster=NA) }}###该函数mat输入表达矩阵,info输入分组信息(行名为细胞,Cell.type为分组信息),需要预先载入dplyrAverage_express<-function(mat,info){ new_res<-mat%>%tibble::rownames_to_column(var="Name")%>%tidyr::gather(cell,value,-Name) new_cellinfo<-info new_cellinfo<-tibble::rownames_to_column(new_cellinfo,var="cell") all<-left_join(new_res,new_cellinfo,by="cell") fuck<-all%>%group_by(Name,Cell.type)%>%summarise(Mean=mean(value))%>%tidyr::spread(Cell.type,Mean) fuck}for(i in c("PF","PO3","PO7","PO11")){ expMat<- read.table(pystr_format("cluster_1.exprMat_{1}.txt",i),sep="\t",header=TRUE,row.names=1,check.names=FALSE,stringsAsFactors=FALSE) cellInfo<-read.table(pystr_format("cluster_1.cellInfo_{1}.txt",i),sep="\t",header=TRUE,row.names=1,check.names=FALSE,stringsAsFactors=FALSE) mydata<-as.matrix(expMat) res_es <- gsva(mydata, geneSets, min.sz=10, max.sz=500, verbose=FALSE, parallel.sz=8) res_es<-as.data.frame(res_es) write.table(res_es,pystr_format("./cell_diff/cluster_1_{1}_c3.txt",i),sep="\t",quote=FALSE) ##差异分析 ##该函数用来分组,x为一个列表,s为输入的分组信息 ##计算差异基因集的合集 #fin<-unique(unlist(lapply(list(0,1,2,3),cal_Diff,cellInfo=cellInfo,res_es=res_es,num=10))) ##这里要明确细胞分类的个数,多了会报错 fin<-do.call('rbind',lapply(list(0,1,2,3),cal_Diff,cellInfo=cellInfo,res_es=res_es,num=10)) fin<-fin%>%filter(!is.na(cluster)) write.table(fin,pystr_format("./cell_diff/cluster_1_{1}_c3_diff_geneset.txt",i),sep="\t",quote=FALSE,row.names=FALSE) ###计算平均表达量矩阵 heihei2<-Average_express(res_es,cellInfo) heihei_need<-filter(heihei2,Name%in%unique(fin$Name)) houhou2<-tibble::column_to_rownames(heihei_need,var="Name") ###画热图 pheatmap::pheatmap(houhou2,file=pystr_format("./cell_diff/cluster_1_{1}_c3_diff_heat.pdf",i))}