# filter expression matrix
mx <- read.table("04.counts/hcc_example.miRNA.homer.ct.mx",sep="\t",header=T)
# filter genes
filter_genes <- apply(
mx[,2:ncol(mx)],
1,
function(x) length(x[x > 2]) >= 2
)
mx_filterGenes <- mx[filter_genes,]
# load experimential design
design <- read.table("05.diffexp/design.tsv",sep="\t",header=T)
#-----------------------------------------
# basic script for normalizing with DESeq2
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, cbind, colMeans,
## colnames, colSums, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax,
## pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
## rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
## tapply, union, unique, unsplit, which, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:base':
##
## apply
#Read Data in
countData <- mx_filterGenes
colData <- design
# generate Dataset
dds <- DESeqDataSetFromMatrix(countData, colData, design=~Treatment, tidy=TRUE)
## converting counts to integer mode
# normlize using rlog method
norm <- rlog(dds,blind=FALSE)
norm_matrix <- assay(norm)
norm_df <- data.frame(Gene=rownames(norm_matrix), norm_matrix)
if(!file.exists("05.diffexp/hcc_example.miRNA.homer.DESeq2.rlog.mx")){
write.table(norm_df, "05.diffexp/hcc_example.miRNA.homer.DESeq2.rlog.mx", row.names = FALSE,sep="\t")}
deg <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- results(deg,tidy=TRUE)
merged_res <- merge(norm_df,res,by.x="Gene",by.y="row")
if(!file.exists("05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv")){
write.table(merged_res,file="05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv",sep="\t",row.names=FALSE)
}
the codes use DESeq2 and produces files:
05.diffexp/hcc_example.miRNA.homer.DESeq2.rlog.mx
05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv
# 0.filter expression matrix
mx <- read.table("04.counts/hcc_example.miRNA.homer.ct.mx",sep="\t",header=T)
# filter genes
filter_genes <- apply(
mx[,2:ncol(mx)],
1,
function(x) length(x[x > 2]) >= 2
)
mx_filterGenes <- mx[filter_genes,]
# load experimential design
design <- read.table("05.diffexp/design.tsv",sep="\t",header=T)
#--------------------------------
# basic script for running edgeR
library(edgeR)
## Loading required package: limma
##
## Attaching package: 'limma'
## The following object is masked from 'package:DESeq2':
##
## plotMA
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
#Read Data in
countData <- mx_filterGenes[,-1]
rownames(countData) <- mx_filterGenes[,1]
design <- read.table("05.diffexp/design.tsv",sep="\t",header=T)
colData <- design
# generate DGE object
y <- DGEList(countData, samples=colData, group=colData$Treatment)
y <- calcNormFactors(y)
#Estimate Error Model
design <-model.matrix(~Treatment, data=colData)
y <- estimateDisp(y, design)
# classic methods: compute p-values, then output
et <- exactTest(y)
res <- topTags(et,Inf)
tidyResult <- data.frame(Gene=rownames(res$table), res$table)
if(!file.exists("05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.classic.tsv")){
write.table(tidyResult,file="05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.classic.tsv",sep="\t",row.names=FALSE)}
# Generalized linear models
fit <- glmFit(y,design)
# likelihood ratio test
lrt <- glmLRT(fit,contrast = c(1,-1))
FDR <- p.adjust(lrt$table$PValue, method="BH")
padj_lrt <- cbind(lrt$table,FDR)
fit_df <- lrt$fitted.values
if(!file.exists("05.diffexp/hcc_example.miRNA.homer.edgeR.TMM.mx")){
write.table(fit_df,file = "05.diffexp/hcc_example.miRNA.homer.edgeR.TMM.mx",row.names = T, sep="\t", quote=F)}
merged_lrt <- merge(fit_df,padj_lrt,by="row.names")
colnames(merged_lrt)[1] <- "Genes"
if(!file.exists("05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.tsv")){
write.table(merged_lrt,file ="05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.tsv",row.names = F, sep="\t", quote=F)}
the codes use edgeR and produces files:
05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.classic.tsv
05.diffexp/hcc_example.miRNA.homer.edgeR.TMM.mx
05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.tsv
cpmMx <- read.table("04.counts/hcc_example.miRNA.homer.rpm.mx",sep="\t",header=T)
filter_cpm <- apply(
mx[,2:ncol(cpmMx)],
1,
function(x) length(x[x > 0]) >= 2
)
mx_filterCPM <- cpmMx[filter_cpm,]
myFun <- function(x){
x = as.numeric(x)
v1 = x[2:4]
v2 = x[5:10]
out <- wilcox.test(v1,v2,exact = F)
out <- out$p.value
}
p_value <- apply(mx_filterCPM,1,myFun)
p_value[is.nan(p_value)] <- 1
FDR <- p.adjust(p_value,method = "BH")
mx_filterCPM$avgNC <- apply(mx_filterCPM[,2:4],1,mean)
mx_filterCPM$avgHCC <- apply(mx_filterCPM[,5:10],1,mean)
mx_filterCPM$log2fc <- log2((mx_filterCPM$avgNC+0.25)/(mx_filterCPM$avgHCC+0.25))
results <- cbind(mx_filterCPM,p_value,FDR)
if(!file.exists("05.diffexp/hcc_example.miRNA.NCvsHCC.wilcox__.tsv")){
write.table(results,file = "05.diffexp/hcc_example.miRNA.NCvsHCC.wilcox__.tsv",row.names = F, sep="\t", quote=F)}
the codes use wilconx test and produces files:
05.diffexp/hcc_example.miRNA.NCvsHCC.wilcox_.tsv
# filter expression matrix
mx <- read.table("04.counts/hcc_example.miRNA.homer.ct.mx",sep="\t",header=T)
# filter genes
filter_genes <- apply(
mx[,2:ncol(mx)],
1,
function(x) length(x[x > 2]) >= 2
)
mx_filterGenes <- mx[filter_genes,]
# load experimential design
design <- read.table("05.diffexp/design.tsv",sep="\t",header=T)
#-----------------------------------------
# basic script for normalizing with DESeq2
library(DESeq2)
#Read Data in
countData <- mx_filterGenes
colData <- design
# generate Dataset
dds <- DESeqDataSetFromMatrix(countData, colData, design=~Treatment, tidy=TRUE)
## converting counts to integer mode
# normlize using rlog mathod
norm <- rlog(dds,blind=FALSE)
norm_matrix <- assay(norm)
# diff-exp analysis using DESeq2
deg <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- results(deg,tidy=TRUE)
library("pheatmap")
select <- order(rowMeans(counts(deg,normalized=TRUE)), decreasing=TRUE)[1:25]
df <- as.data.frame(colData(deg)[,c("Treatment","Sample")])
pheatmap(assay(norm)[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=TRUE, annotation_col=df)
plotPCA(norm, intgroup=c("Treatment"))
#### check distance between samples
sampleDists <- dist(t(assay(norm)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(norm$Treatment, norm$Sample, sep="-")
colnames(sampleDistMatrix) <- paste(norm$Treatment, norm$Sample, sep="-")
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors)
#plotMA(deg, ylim=c(-5,5))
plotCounts(deg, gene=which.min(res$padj), intgroup="Treatment")
awk 'BEGIN{FS=OFS="\t"}($12>1 && $15<=0.05){print $1}' 05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv > 05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.NC_high.ids
awk 'BEGIN{FS=OFS="\t"}($12<-1 && $15<=0.05){print $1}' 05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv > 05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.HCC_high.ids
library('gplots')
##
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
mx <- read.table("05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.ids.rpkm.forPlot",sep="\t",head=T)
log2mx <- log2(mx+0.05)
#pdf(width=4,height=6)
heatmap.2(as.matrix(log2mx), Rowv=T, Colv=T, dendrogram = "both", trace = "none", density.info = c("none"), scale="row", cexCol=0.5)