expression

DESeq2

# filter expression matrix
mx <- read.table("04.counts/hcc_example.miRNA.homer.ct.mx",sep="\t",header=T)

# filter genes
filter_genes <- apply(
  mx[,2:ncol(mx)],
  1,
  function(x) length(x[x > 2]) >= 2
)

mx_filterGenes <- mx[filter_genes,]

# load experimential design
design <- read.table("05.diffexp/design.tsv",sep="\t",header=T)

#-----------------------------------------
# basic script for normalizing with DESeq2
library(DESeq2)

## Loading required package: S4Vectors

## Loading required package: stats4

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colMeans,
##     colnames, colSums, do.call, duplicated, eval, evalq, Filter,
##     Find, get, grep, grepl, intersect, is.unsorted, lapply,
##     lengths, Map, mapply, match, mget, order, paste, pmax,
##     pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce,
##     rowMeans, rownames, rowSums, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:base':
## 
##     expand.grid

## Loading required package: IRanges

## Loading required package: GenomicRanges

## Loading required package: GenomeInfoDb

## Loading required package: SummarizedExperiment

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## Loading required package: DelayedArray

## Loading required package: matrixStats

## 
## Attaching package: 'matrixStats'

## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians

## 
## Attaching package: 'DelayedArray'

## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges

## The following object is masked from 'package:base':
## 
##     apply

#Read Data in
countData <- mx_filterGenes
colData <- design

# generate Dataset
dds <- DESeqDataSetFromMatrix(countData, colData, design=~Treatment, tidy=TRUE)

## converting counts to integer mode

# normlize using rlog method
norm <- rlog(dds,blind=FALSE)
norm_matrix <- assay(norm)
norm_df <- data.frame(Gene=rownames(norm_matrix), norm_matrix)

if(!file.exists("05.diffexp/hcc_example.miRNA.homer.DESeq2.rlog.mx")){
write.table(norm_df, "05.diffexp/hcc_example.miRNA.homer.DESeq2.rlog.mx", row.names = FALSE,sep="\t")}

deg <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

res <- results(deg,tidy=TRUE)
merged_res <- merge(norm_df,res,by.x="Gene",by.y="row")

if(!file.exists("05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv")){
write.table(merged_res,file="05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv",sep="\t",row.names=FALSE)
}

the codes use DESeq2 and produces files:

05.diffexp/hcc_example.miRNA.homer.DESeq2.rlog.mx

05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv

edgeR

# 0.filter expression matrix
mx <- read.table("04.counts/hcc_example.miRNA.homer.ct.mx",sep="\t",header=T)

# filter genes
filter_genes <- apply(
    mx[,2:ncol(mx)],
    1,
    function(x) length(x[x > 2]) >= 2
)

mx_filterGenes <- mx[filter_genes,]

# load experimential design
design <- read.table("05.diffexp/design.tsv",sep="\t",header=T)

#--------------------------------
# basic script for running edgeR
library(edgeR)

## Loading required package: limma

## 
## Attaching package: 'limma'

## The following object is masked from 'package:DESeq2':
## 
##     plotMA

## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA

#Read Data in
countData <- mx_filterGenes[,-1]
rownames(countData) <- mx_filterGenes[,1]
design <- read.table("05.diffexp/design.tsv",sep="\t",header=T)
colData <- design

# generate DGE object
y <- DGEList(countData, samples=colData, group=colData$Treatment)
y <- calcNormFactors(y)

#Estimate Error Model
design <-model.matrix(~Treatment, data=colData)
y <- estimateDisp(y, design)

# classic methods: compute p-values, then output
et <- exactTest(y)
res <- topTags(et,Inf)
tidyResult <- data.frame(Gene=rownames(res$table), res$table)

if(!file.exists("05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.classic.tsv")){
write.table(tidyResult,file="05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.classic.tsv",sep="\t",row.names=FALSE)}

# Generalized linear models
fit <- glmFit(y,design)
# likelihood ratio test
lrt <- glmLRT(fit,contrast = c(1,-1))
FDR <- p.adjust(lrt$table$PValue, method="BH")
padj_lrt <- cbind(lrt$table,FDR)
fit_df <- lrt$fitted.values
if(!file.exists("05.diffexp/hcc_example.miRNA.homer.edgeR.TMM.mx")){
write.table(fit_df,file = "05.diffexp/hcc_example.miRNA.homer.edgeR.TMM.mx",row.names = T, sep="\t", quote=F)}
merged_lrt <- merge(fit_df,padj_lrt,by="row.names")
colnames(merged_lrt)[1] <- "Genes"

if(!file.exists("05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.tsv")){
write.table(merged_lrt,file ="05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.tsv",row.names = F, sep="\t", quote=F)}

the codes use edgeR and produces files:

05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.classic.tsv

05.diffexp/hcc_example.miRNA.homer.edgeR.TMM.mx

05.diffexp/hcc_example.miRNA.NCvsHCC.edgeR.tsv

Wilcox/Mann-Whitney-U Test

1. normalize the reads by library size (edgeR)

2. identify differential expressed gene using wilcoxon.test()

cpmMx <- read.table("04.counts/hcc_example.miRNA.homer.rpm.mx",sep="\t",header=T)
filter_cpm <- apply(
    mx[,2:ncol(cpmMx)],
    1,
    function(x) length(x[x > 0]) >= 2
)
mx_filterCPM <- cpmMx[filter_cpm,]

myFun <- function(x){
  x = as.numeric(x)
  v1 = x[2:4]
  v2 = x[5:10]
  out <- wilcox.test(v1,v2,exact = F) 
  out <- out$p.value
}
p_value <- apply(mx_filterCPM,1,myFun)
p_value[is.nan(p_value)] <- 1
FDR <- p.adjust(p_value,method = "BH")
mx_filterCPM$avgNC <- apply(mx_filterCPM[,2:4],1,mean)
mx_filterCPM$avgHCC <- apply(mx_filterCPM[,5:10],1,mean)
mx_filterCPM$log2fc <- log2((mx_filterCPM$avgNC+0.25)/(mx_filterCPM$avgHCC+0.25))
results <- cbind(mx_filterCPM,p_value,FDR)
if(!file.exists("05.diffexp/hcc_example.miRNA.NCvsHCC.wilcox__.tsv")){
write.table(results,file = "05.diffexp/hcc_example.miRNA.NCvsHCC.wilcox__.tsv",row.names = F, sep="\t", quote=F)}

the codes use wilconx test and produces files:

05.diffexp/hcc_example.miRNA.NCvsHCC.wilcox_.tsv

exploring results

# filter expression matrix
mx <- read.table("04.counts/hcc_example.miRNA.homer.ct.mx",sep="\t",header=T)

# filter genes
filter_genes <- apply(
  mx[,2:ncol(mx)],
  1,
  function(x) length(x[x > 2]) >= 2
)

mx_filterGenes <- mx[filter_genes,]

# load experimential design
design <- read.table("05.diffexp/design.tsv",sep="\t",header=T)

#-----------------------------------------
# basic script for normalizing with DESeq2
library(DESeq2)
#Read Data in
countData <- mx_filterGenes
colData <- design

# generate Dataset
dds <- DESeqDataSetFromMatrix(countData, colData, design=~Treatment, tidy=TRUE)

## converting counts to integer mode

# normlize using rlog mathod
norm <- rlog(dds,blind=FALSE)
norm_matrix <- assay(norm)

# diff-exp analysis using DESeq2
deg <- DESeq(dds)

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

res <- results(deg,tidy=TRUE)

Heatmap for count matrix

library("pheatmap")
select <- order(rowMeans(counts(deg,normalized=TRUE)), decreasing=TRUE)[1:25]
df <- as.data.frame(colData(deg)[,c("Treatment","Sample")])
pheatmap(assay(norm)[select,], cluster_rows=FALSE, show_rownames=FALSE, cluster_cols=TRUE, annotation_col=df)

PCA

plotPCA(norm, intgroup=c("Treatment"))

#### check distance between samples

sampleDists <- dist(t(assay(norm)))
library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(norm$Treatment, norm$Sample, sep="-")
colnames(sampleDistMatrix) <- paste(norm$Treatment, norm$Sample, sep="-")
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors)

MA-plot

#plotMA(deg, ylim=c(-5,5))

plotCounts(deg, gene=which.min(res$padj), intgroup="Treatment")

awk 'BEGIN{FS=OFS="\t"}($12>1 && $15<=0.05){print $1}' 05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv > 05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.NC_high.ids
awk 'BEGIN{FS=OFS="\t"}($12<-1 && $15<=0.05){print $1}' 05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.tsv > 05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.HCC_high.ids

library('gplots')

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:IRanges':
## 
##     space

## The following object is masked from 'package:S4Vectors':
## 
##     space

## The following object is masked from 'package:stats':
## 
##     lowess

mx <- read.table("05.diffexp/hcc_example.miRNA.NCvsHCC.DESeq2.ids.rpkm.forPlot",sep="\t",head=T)
log2mx <- log2(mx+0.05)
#pdf(width=4,height=6)
heatmap.2(as.matrix(log2mx), Rowv=T, Colv=T, dendrogram = "both", trace = "none", density.info = c("none"), scale="row", cexCol=0.5)