set up

Let’s define some parameters so it can be adapted to different analysis.

R> params=list(
+     se_file= "data/gse.rds",
+     design= "~ group",
+     contrast= "group.E_vs_D",
+     metadata= c("group"), # first one used in some plots
+     alpha= 0.05,
+     lfc= 0,
+     slot= "vst",
+     output_dir= "docs/results",
+     cache_dir= "cache",
+     cache_on= FALSE)

Parent folder

R> message(getwd())

Check params

R> library(SummarizedExperiment)
R> library(stringr)
R> 
R> # add check function
R> if(!file.exists(params$se_file))
+     stop("se_file isn't found: ", params$se_file)
R> 
R> se = readRDS(params$se_file)
R> stopifnot(class(se) == "SummarizedExperiment" | class(se) == "RangedSummarizedExperiment")
R> 
R> stopifnot(sum(params$metadata %in% colnames(colData(se))) == length(params$metadata))
R> 
R> comparisons = str_split(params$contrast, ",")[[1]]
R> contrasts = lapply(comparisons, function(c){
+     m = str_match(c, "(.+)\\.(.+)_vs_(.+)") %>% 
+         as.vector()
+ 
+     if( ! m[2] %in% colnames(colData(se)))
+         stop(c, "in column ", m[2]," not in colnames: ",
+              paste(colnames(colData(se)), collapse = " ") )
+     values = unique(colData(se)[[m[2]]])
+     if(! m[3] %in% colData(se)[[m[2]]])
+         stop("value ", m[3], " not in column values: ",
+              paste(values))
+     if(! m[4] %in% colData(se)[[m[2]]])
+         stop("value ", m[4], " not in column values: ",
+              paste(values))
+     c(m[2], m[3], m[4])
+ })

Load data

R> # Set seed for reproducibility
R> set.seed(1454944673)
R> library(knitr)
R> library(ggplot2)
R> 
R> opts_chunk[["set"]](
+     autodep = TRUE,
+     bootstrap.show.code = FALSE,
+     cache = params$cache_on,
+     cache.lazy = params$cache_on,
+     cache.path = params$cache_dir,
+     dev = c("png", "pdf"),
+     error = FALSE,
+     fig.height = 7,
+     fig.retina = 2,
+     fig.width = 7,
+     highlight = TRUE,
+     message = FALSE,
+     prompt = TRUE,
+     # formatR required for tidy code
+     tidy = TRUE,
+     warning = FALSE)
R> 
R> theme_set(
+     theme_light(base_size = 14))
R> theme_update(
+     legend.justification = "center",
+     legend.position = "bottom")

R> library(DESeq2)
R> library(DEGreport)
R> library(pheatmap)
R> library(tidyverse)
R> library(readr)
R> 
R> lfc = params$lfc
R> alpha = params$alpha
R> # Directory paths
R> output_dir <- params$output_dir
R> dataDir <- file.path(params$output_dir, "data")
R> dir.create(dataDir, showWarnings = FALSE, recursive = TRUE)
R> countsDir <- file.path(output_dir, "results", "counts")
R> dir.create(countsDir, showWarnings = FALSE, recursive = TRUE)
R> deDir <- file.path(output_dir, "results", "differential_expression")
R> dir.create(deDir, showWarnings = FALSE, recursive = TRUE)

R> # help('design', 'DESeq2')
R> dds <- DESeqDataSetFromMatrix(countData = round(assay(se)), colData = colData(se), 
+     design = formula(params$design)) %>% DESeq()
R> rld <- varianceStabilizingTransformation(dds)
R> save(dds, rld, file = file.path(dataDir, "dds.rda"))

R> load(file.path(dataDir, "dds.rda"))

R> as.data.frame(assay(se)) %>% rownames_to_column("gene_id") %>% left_join(as.data.frame(rowData(se)), 
+     by = "gene_id") %>% write_csv(file.path(countsDir, "raw.csv.gz"))
R> 
R> as.data.frame(assay(rld)) %>% rownames_to_column("gene_id") %>% left_join(as.data.frame(rowData(se)), 
+     by = "gene_id") %>% write_csv(file.path(countsDir, "log2_normalized.csv.gz"))

Quality metrics

Full quality metrics can be found at [multiqc_report.html].

Number of genes

We calculate the number of genes with counts > 0 in each sample.

R> data.frame(samples = colnames(se), num_genes = colSums(assay(se) > 0)) %>% ggplot(aes(samples, 
+     num_genes)) + geom_bar(stat = "identity") + coord_flip()

PCA

Principal component analysis is a technique to reduce the dimensionality of the data to allow visualization in two dimensions PCA. It takes all the gene abundances for the samples and creates a series of principal components (PCs) to explain the variability in the data. We normally plot the first two PCs for simplicity.

R> assays(se)[["vst"]] = assay(rld)
R> 
R> degPCA(assays(se)[[params$slot]], colData(se), condition = params$metadata[1])

R> # + ggrepel::geom_text_repel(aes(label=Name)) if you wish to add sample
R> # names

Results

We calculate the different contrasts (comparisons) in a single step with the degComps function, and all results are stored in a single variable.

R> # ?degComps: Read how to get the different contrasts for coefficients from
R> # one column: degComps(dds, combs = column_name) for all possible paris:
R> # degComps(dds, combs = column_name, pairs = TRUE) for specific contrasts:
R> # degComps(dds, contrasts = list(c(column_name, group1, group2),
R> # c(column_name, group1, group3)))
R> 
R> comparisons = degComps(dds, contrast = contrasts, type = "ashr")
R> 
R> save(comparisons, file = file.path(dataDir, "comparisons.rda"))

R> lapply(file.path(dataDir, c("dds.rda", "comparisons.rda")), load, environment()) %>% 
+     invisible()

We performed the analysis using a BH adjusted P value cutoff of 0.05 and a log fold-change (LFC) ratio cutoff of 0.

Alpha level (FDR) cutoffs

Let’s take a look at the number of genes we get with different false discovery rate (FDR) cutoffs. These tests subset P values that have been multiple test corrected using the Benjamini Hochberg (BH) method [@Benjamini:1995ws].

R> lapply(names(comparisons), function(x) {
+     cat("### ", x, "\n")
+     degSummary(comparisons[[x]], kable = TRUE) %>% show
+     cat("\n\n")
+ }) %>% invisible()

group_E_vs_D

	0.1	0.05	0.01
LFC > 0 (up)	LFC > 0 (up) : 533, 3%	LFC > 0 (up) : 402, 2.3%	LFC > 0 (up) : 206, 1.2%
LFC < 0 (down)	LFC < 0 (down) : 1016, 5.7%	LFC < 0 (down) : 883, 5%	LFC < 0 (down) : 709, 4%
outliers	outliers [1] : 0, 0%	outliers [1] : 0, 0%	outliers [1] : 0, 0%
low counts	low counts [2] : 9521, 54%	low counts [2] : 9521, 54%	low counts [2] : 9521, 54%
cutoff	(mean count < 7)	(mean count < 7)	(mean count < 7)
NULL

Plots

Mean average (MA)

An MA plot compares transformed counts on M (log ratio) and A (mean average) scales [@Yang:2002ty]. Blue arrows represent a correction of the log2 Fold Change values due to variability. Normally this happens when the gene shows high variation and the log2FC is not accurate, here the model tries to estimate them. See this paper for more information:

Love, M.I., Huber, W., Anders, S. (2014) Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biology, 15:550. https://doi.org/10.1186/s13059-014-0550-8

R> lapply(names(comparisons), function(x) {
+     cat("### ", x, "\n")
+     print(degMA(comparisons[[x]], limit = 5))
+     cat("\n\n")
+ }) %>% invisible()

group_E_vs_D

Volcano

A volcano plot compares significance (BH-adjusted P value) against fold change (log2) [@Cui:2003kh; @Li:2014fv]. Genes in the green box with text labels have an adjusted P value are likely to be the top candidate genes of interest.

R> lapply(names(comparisons), function(x) {
+     cat("### ", x, "\n")
+     print(degVolcano(comparisons[[x]]))
+     cat("\n\n")
+ }) %>% invisible()

group_E_vs_D

Heatmap

This plot shows only differentially expressed genes on a per-sample basis. We have scaled the data by row and used the ward.D2 method for clustering [@WardJr:1963eu].

R> # lapply(names(comparisons), function(x){
R> for (x in names(comparisons)) {
+     cat("### ", x, "\n")
+     s = significants(comparisons[[x]])
+     if (length(s) < 2) {
+         cat("Less than two genes, no possible to plot")
+     } else {
+         p = pheatmap(assays(se)[[params$slot]][s, ], scale = "row", annotation_col = as.data.frame(colData(se)[, 
+             params$metadata, drop = F]), show_rownames = FALSE, show_colnames = FALSE, 
+             clustering_method = "ward.D2", clustering_distance_cols = "correlation")
+         print(p)
+     }
+     cat("\n\n")
+ }

group_E_vs_D

PCA

R> lapply(names(comparisons), function(x) {
+     cat("### ", x, "\n")
+     s = significants(comparisons[[x]])
+     if (length(s) < 2) {
+         cat("Less than two genes, no possible to plot")
+     } else {
+         degPCA(assays(se)[[params$slot]][s, ], colData(se), condition = params$metadata[1]) %>% 
+             print
+         
+     }
+     cat("\n\n")
+ }) %>% invisible()

group_E_vs_D

Gene Expression Patterns

In general, it is useful to cluster the significant genes together in similar patterns across samples. degPatterns uses standard expression correlation technique to generate a similarity matrix that can be clustered hierarchically and then split into groups of genes that follow similar expression patterns degPatterns.

We defined significance as genes with abs(log2FC) > and FDR < .

R> # choose the significants genes.  In this case the first is used. `sig`
R> # should be a character vector with genes.
R> sig = significants(comparisons, fc = params$lfc, padj = params$alpha)
R> if (length(sig) > 20) {
+     colData(se)[[params$metadata[1]]] = as.factor(colData(se)[[params$metadata[1]]])
+     pattern = degPatterns(assays(se)[[params$slot]][sig, ], colData(se), time = params$metadata[1], 
+         minc = length(sig) * 0.01)
+ } else {
+     cat("Not enough genes to make a pattern analysis.")
+ }

Top genes

R> resTbl = lapply(names(comparisons), function(x) {
+     cat("### ", x, "\n")
+     sign = significants(comparisons[[x]])
+     if (length(sign) == 0) {
+         cat("\n\n**Not significant genes**\n\n")
+         cat("\n\n")
+         return(NULL)
+     }
+     n = min(15, length(sign))
+     p = degPlot(se, genes = sign[1:n], xs = params$metadata[1], slot = params$slot, 
+         log2 = FALSE, ann = c("gene_id", "gene_name")) + geom_boxplot(alpha = 0.3) + 
+         theme(axis.text.x = element_text(angle = 45, hjust = 0.5, vjust = 0.5))
+     print(p)
+     cat("\n\n")
+     res = deg(comparisons[[x]], tidy = "tibble") %>% left_join(rowData(se) %>% 
+         as.data.frame, by = c(gene = "gene_id"))
+     if (exists("pattern")) 
+         res = left_join(res, pattern[["df"]], by = c(gene = "genes"))
+     dir.create(file.path(deDir, x), showWarnings = FALSE, recursive = TRUE)
+     write_csv(res, file.path(deDir, x, paste0(x, ".csv.gz")))
+     res
+ })

group_E_vs_D

R> names(resTbl) = names(comparisons)

Top tables

Top 10 genes are shown order by False Discovery Rate. Genes below 0.05 are considered significant.

R> lapply(names(resTbl), function(x) {
+     cat("### ", x, "\n")
+     head(resTbl[[x]], 10) %>% kable %>% show
+     cat("\n\n")
+ }) %>% invisible()

group_E_vs_D

gene	baseMean	log2FoldChange	lfcSE	gene_name	cluster
ENSMUSG00000022868	15751.9	-6.301	0.2635	Ahsg	2
ENSMUSG00000031722	1666.4	-6.989	0.3141	Hp	2
ENSMUSG00000040564	4369.6	-5.198	0.2564	Apoc1	2
ENSMUSG00000039323	1968.6	-7.259	0.3719	Igfbp2	2
ENSMUSG00000022875	1578.0	-5.269	0.2737	Kng1	2
ENSMUSG00000000049	1523.7	-5.433	0.2891	Apoh	2
ENSMUSG00000054422	2443.4	-5.540	0.2970	Fabp1	2
ENSMUSG00000002985	15557.3	-4.470	0.2400	Apoe	2
ENSMUSG00000030359	843.2	-5.960	0.3407	Pzp	2
ENSMUSG00000030895	1734.3	-5.185	0.2986	Hpx	2

File downloads

The results are saved as gzip-compressed comma separated values (CSV). Gzip compression is natively supported on macOS and Linux-based operating systems. If you’re running Windows, we recommend installing 7-Zip. CSV files can be opened in Excel or RStudio.

Count matrices

Tables are under docs/results/results/counts folder:

normalizedCounts.csv.gz: Use to evaluate individual genes and/or generate plots. These counts are normalized for the variation in sequencing depth across samples.
tpm.csv.gz: Transcripts per million, scaled by length and also suitable for plotting.
rawCounts.csv.gz: Only use to perform a new differential expression analysis. These counts will vary across samples due to differences in sequencing depth, and have not been normalized. Do not use this file for plotting genes.

Differential expression tables

Tables are under docs/results/results/differential_expression folder:

DEG tables are sorted by BH-adjusted P value, and contain the following columns:

gene: Ensembl gene identifier.
baseMean: Mean of the normalized counts per gene for all samples.
log2FoldChange: log2 fold change.
lfcSE: log2 standard error.
stat: Wald statistic.
pvalue: Walt test P value.
padj: BH adjusted Wald test P value (corrected for multiple comparisons; aka FDR).
gene_name: Ensembl name (a.k.a. symbol).
cluster: Group defined in the Gene Expression Patterns section.

Methods

RNA-seq counts were generated by nf-core and nextflow using the rnaseq pipeline that integrates salmon for quantification. Counts were imported into R using tximport and DESeq2. Gene annotations were obtained from Ensembl. Plots were generated by ggplot2 . Heatmaps were generated by pheatmap. DE analysis plots were done by DEGreport.

R session information

R> devtools::session_info()

## ─ Session info ──────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.0 (2019-04-26)
##  os       macOS Mojave 10.14.3        
##  system   x86_64, darwin15.6.0        
##  ui       RStudio                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2019-07-08                  
## 
## ─ Packages ──────────────────────────────────────────────────────────────
##  package              * version   date       lib source        
##  acepack                1.4.1     2016-10-29 [1] CRAN (R 3.6.0)
##  annotate               1.62.0    2019-05-02 [1] Bioconductor  
##  AnnotationDbi          1.46.0    2019-05-02 [1] Bioconductor  
##  ashr                   2.2-32    2019-02-22 [1] CRAN (R 3.6.0)
##  assertthat             0.2.1     2019-03-21 [1] CRAN (R 3.6.0)
##  backports              1.1.4     2019-04-10 [1] CRAN (R 3.6.0)
##  base64enc              0.1-3     2015-07-28 [1] CRAN (R 3.6.0)
##  Biobase              * 2.44.0    2019-05-02 [1] Bioconductor  
##  BiocGenerics         * 0.30.0    2019-05-02 [1] Bioconductor  
##  BiocParallel         * 1.18.0    2019-05-02 [1] Bioconductor  
##  bit                    1.1-14    2018-05-29 [1] CRAN (R 3.6.0)
##  bit64                  0.9-7     2017-05-08 [1] CRAN (R 3.6.0)
##  bitops                 1.0-6     2013-08-17 [1] CRAN (R 3.6.0)
##  blob                   1.1.1     2018-03-25 [1] CRAN (R 3.6.0)
##  broom                  0.5.2     2019-04-07 [1] CRAN (R 3.6.0)
##  callr                  3.2.0     2019-03-15 [1] CRAN (R 3.6.0)
##  cellranger             1.1.0     2016-07-27 [1] CRAN (R 3.6.0)
##  checkmate              1.9.3     2019-05-03 [1] CRAN (R 3.6.0)
##  circlize               0.4.6     2019-04-03 [1] CRAN (R 3.6.0)
##  cli                    1.1.0     2019-03-19 [1] CRAN (R 3.6.0)
##  clue                   0.3-57    2019-02-25 [1] CRAN (R 3.6.0)
##  cluster                2.0.9     2019-05-01 [1] CRAN (R 3.6.0)
##  codetools              0.2-16    2018-12-24 [1] CRAN (R 3.6.0)
##  colorspace             1.4-1     2019-03-18 [1] CRAN (R 3.6.0)
##  ComplexHeatmap         2.0.0     2019-05-02 [1] Bioconductor  
##  ConsensusClusterPlus   1.48.0    2019-05-02 [1] Bioconductor  
##  cowplot                0.9.4     2019-01-08 [1] CRAN (R 3.6.0)
##  crayon                 1.3.4     2017-09-16 [1] CRAN (R 3.6.0)
##  data.table             1.12.2    2019-04-07 [1] CRAN (R 3.6.0)
##  DBI                    1.0.0     2018-05-02 [1] CRAN (R 3.6.0)
##  DEGreport            * 1.20.0    2019-05-02 [1] Bioconductor  
##  DelayedArray         * 0.10.0    2019-05-02 [1] Bioconductor  
##  desc                   1.2.0     2018-05-01 [1] CRAN (R 3.6.0)
##  DESeq2               * 1.24.0    2019-05-02 [1] Bioconductor  
##  devtools               2.0.2     2019-04-08 [1] CRAN (R 3.6.0)
##  digest                 0.6.19    2019-05-20 [1] CRAN (R 3.6.0)
##  doParallel             1.0.14    2018-09-24 [1] CRAN (R 3.6.0)
##  dplyr                * 0.8.1     2019-05-14 [1] CRAN (R 3.6.0)
##  edgeR                  3.26.4    2019-05-27 [1] Bioconductor  
##  evaluate               0.14      2019-05-28 [1] CRAN (R 3.6.0)
##  forcats              * 0.4.0     2019-02-17 [1] CRAN (R 3.6.0)
##  foreach                1.4.4     2017-12-12 [1] CRAN (R 3.6.0)
##  foreign                0.8-71    2018-07-20 [1] CRAN (R 3.6.0)
##  formatR                1.6       2019-03-05 [1] CRAN (R 3.6.0)
##  Formula                1.2-3     2018-05-03 [1] CRAN (R 3.6.0)
##  fs                     1.3.1     2019-05-06 [1] CRAN (R 3.6.0)
##  genefilter             1.66.0    2019-05-02 [1] Bioconductor  
##  geneplotter            1.62.0    2019-05-02 [1] Bioconductor  
##  generics               0.0.2     2018-11-29 [1] CRAN (R 3.6.0)
##  GenomeInfoDb         * 1.20.0    2019-05-02 [1] Bioconductor  
##  GenomeInfoDbData       1.2.1     2019-05-06 [1] Bioconductor  
##  GenomicRanges        * 1.36.0    2019-05-02 [1] Bioconductor  
##  GetoptLong             0.1.7     2018-06-10 [1] CRAN (R 3.6.0)
##  ggdendro               0.1-20    2016-04-27 [1] CRAN (R 3.6.0)
##  ggplot2              * 3.1.1     2019-04-07 [1] CRAN (R 3.6.0)
##  ggrepel                0.8.1     2019-05-07 [1] CRAN (R 3.6.0)
##  GlobalOptions          0.1.0     2018-06-09 [1] CRAN (R 3.6.0)
##  glue                   1.3.1     2019-03-12 [1] CRAN (R 3.6.0)
##  gridExtra              2.3       2017-09-09 [1] CRAN (R 3.6.0)
##  gtable                 0.3.0     2019-03-25 [1] CRAN (R 3.6.0)
##  haven                  2.1.0     2019-02-19 [1] CRAN (R 3.6.0)
##  highr                  0.8       2019-03-20 [1] CRAN (R 3.6.0)
##  Hmisc                  4.2-0     2019-01-26 [1] CRAN (R 3.6.0)
##  hms                    0.4.2     2018-03-10 [1] CRAN (R 3.6.0)
##  htmlTable              1.13.1    2019-01-07 [1] CRAN (R 3.6.0)
##  htmltools              0.3.6     2017-04-28 [1] CRAN (R 3.6.0)
##  htmlwidgets            1.3       2018-09-30 [1] CRAN (R 3.6.0)
##  httr                   1.4.0     2018-12-11 [1] CRAN (R 3.6.0)
##  IRanges              * 2.18.1    2019-05-31 [1] Bioconductor  
##  iterators              1.0.10    2018-07-13 [1] CRAN (R 3.6.0)
##  jsonlite               1.6       2018-12-07 [1] CRAN (R 3.6.0)
##  knitr                * 1.23      2019-05-18 [1] CRAN (R 3.6.0)
##  labeling               0.3       2014-08-23 [1] CRAN (R 3.6.0)
##  lasso2                 1.2-20    2018-11-27 [1] CRAN (R 3.6.0)
##  lattice                0.20-38   2018-11-04 [1] CRAN (R 3.6.0)
##  latticeExtra           0.6-28    2016-02-09 [1] CRAN (R 3.6.0)
##  lazyeval               0.2.2     2019-03-15 [1] CRAN (R 3.6.0)
##  limma                  3.40.2    2019-05-17 [1] Bioconductor  
##  locfit                 1.5-9.1   2013-04-20 [1] CRAN (R 3.6.0)
##  logging                0.9-107   2019-02-10 [1] CRAN (R 3.6.0)
##  lubridate              1.7.4     2018-04-11 [1] CRAN (R 3.6.0)
##  magrittr               1.5       2014-11-22 [1] CRAN (R 3.6.0)
##  MASS                   7.3-51.4  2019-03-31 [1] CRAN (R 3.6.0)
##  Matrix                 1.2-17    2019-03-22 [1] CRAN (R 3.6.0)
##  matrixStats          * 0.54.0    2018-07-23 [1] CRAN (R 3.6.0)
##  memoise                1.1.0     2017-04-21 [1] CRAN (R 3.6.0)
##  mixsqp                 0.1-97    2019-02-18 [1] CRAN (R 3.6.0)
##  mnormt                 1.5-5     2016-10-15 [1] CRAN (R 3.6.0)
##  modelr                 0.1.4     2019-02-18 [1] CRAN (R 3.6.0)
##  munsell                0.5.0     2018-06-12 [1] CRAN (R 3.6.0)
##  nlme                   3.1-140   2019-05-12 [1] CRAN (R 3.6.0)
##  nnet                   7.3-12    2016-02-02 [1] CRAN (R 3.6.0)
##  Nozzle.R1              1.1-1     2013-05-15 [1] CRAN (R 3.6.0)
##  packrat                0.5.0     2018-11-14 [1] CRAN (R 3.6.0)
##  pheatmap             * 1.0.12    2019-01-04 [1] CRAN (R 3.6.0)
##  pillar                 1.4.1     2019-05-28 [1] CRAN (R 3.6.0)
##  pkgbuild               1.0.3     2019-03-20 [1] CRAN (R 3.6.0)
##  pkgconfig              2.0.2     2018-08-16 [1] CRAN (R 3.6.0)
##  pkgload                1.0.2     2018-10-29 [1] CRAN (R 3.6.0)
##  plyr                   1.8.4     2016-06-08 [1] CRAN (R 3.6.0)
##  png                    0.1-7     2013-12-03 [1] CRAN (R 3.6.0)
##  prettyunits            1.0.2     2015-07-13 [1] CRAN (R 3.6.0)
##  processx               3.3.1     2019-05-08 [1] CRAN (R 3.6.0)
##  ps                     1.3.0     2018-12-21 [1] CRAN (R 3.6.0)
##  pscl                   1.5.2     2017-10-10 [1] CRAN (R 3.6.0)
##  psych                  1.8.12    2019-01-12 [1] CRAN (R 3.6.0)
##  purrr                * 0.3.2     2019-03-15 [1] CRAN (R 3.6.0)
##  R6                     2.4.0     2019-02-14 [1] CRAN (R 3.6.0)
##  RColorBrewer           1.1-2     2014-12-07 [1] CRAN (R 3.6.0)
##  Rcpp                   1.0.1     2019-03-17 [1] CRAN (R 3.6.0)
##  RCurl                  1.95-4.12 2019-03-04 [1] CRAN (R 3.6.0)
##  readr                * 1.3.1     2018-12-21 [1] CRAN (R 3.6.0)
##  readxl                 1.3.1     2019-03-13 [1] CRAN (R 3.6.0)
##  remotes                2.0.4     2019-04-10 [1] CRAN (R 3.6.0)
##  reshape                0.8.8     2018-10-23 [1] CRAN (R 3.6.0)
##  rjson                  0.2.20    2018-06-08 [1] CRAN (R 3.6.0)
##  rlang                  0.3.4     2019-04-07 [1] CRAN (R 3.6.0)
##  rmarkdown              1.13      2019-05-22 [1] CRAN (R 3.6.0)
##  rpart                  4.1-15    2019-04-12 [1] CRAN (R 3.6.0)
##  rprojroot              1.3-2     2018-01-03 [1] CRAN (R 3.6.0)
##  RSQLite                2.1.1     2018-05-06 [1] CRAN (R 3.6.0)
##  rstudioapi             0.10      2019-03-19 [1] CRAN (R 3.6.0)
##  rvest                  0.3.4     2019-05-15 [1] CRAN (R 3.6.0)
##  S4Vectors            * 0.22.0    2019-05-02 [1] Bioconductor  
##  scales                 1.0.0     2018-08-09 [1] CRAN (R 3.6.0)
##  sessioninfo            1.1.1     2018-11-05 [1] CRAN (R 3.6.0)
##  shape                  1.4.4     2018-02-07 [1] CRAN (R 3.6.0)
##  SQUAREM                2017.10-1 2017-10-07 [1] CRAN (R 3.6.0)
##  stringi                1.4.3     2019-03-12 [1] CRAN (R 3.6.0)
##  stringr              * 1.4.0     2019-02-10 [1] CRAN (R 3.6.0)
##  SummarizedExperiment * 1.14.0    2019-05-02 [1] Bioconductor  
##  survival               2.44-1.1  2019-04-01 [1] CRAN (R 3.6.0)
##  testthat               2.1.1     2019-04-23 [1] CRAN (R 3.6.0)
##  tibble               * 2.1.2     2019-05-29 [1] CRAN (R 3.6.0)
##  tidyr                * 0.8.3     2019-03-01 [1] CRAN (R 3.6.0)
##  tidyselect             0.2.5     2018-10-11 [1] CRAN (R 3.6.0)
##  tidyverse            * 1.2.1     2017-11-14 [1] CRAN (R 3.6.0)
##  truncnorm              1.0-8     2018-02-27 [1] CRAN (R 3.6.0)
##  tximport             * 1.12.0    2019-05-02 [1] Bioconductor  
##  usethis                1.5.0     2019-04-07 [1] CRAN (R 3.6.0)
##  withr                  2.1.2     2018-03-15 [1] CRAN (R 3.6.0)
##  xfun                   0.7       2019-05-14 [1] CRAN (R 3.6.0)
##  XML                    3.98-1.19 2019-03-06 [1] CRAN (R 3.6.0)
##  xml2                   1.2.0     2018-01-24 [1] CRAN (R 3.6.0)
##  xtable                 1.8-4     2019-04-21 [1] CRAN (R 3.6.0)
##  XVector                0.24.0    2019-05-02 [1] Bioconductor  
##  yaml                   2.2.0     2018-07-25 [1] CRAN (R 3.6.0)
##  zlibbioc               1.30.0    2019-05-02 [1] Bioconductor  
## 
## [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library

R> print(params)

## $se_file
## [1] "data/gse.rds"
## 
## $design
## [1] "~ group"
## 
## $contrast
## [1] "group.E_vs_D"
## 
## $metadata
## [1] "group"
## 
## $alpha
## [1] 0.05
## 
## $lfc
## [1] 0
## 
## $slot
## [1] "vst"
## 
## $output_dir
## [1] "docs/results"
## 
## $cache_dir
## [1] "cache"
## 
## $cache_on
## [1] FALSE

Next lesson

These materials have been developed by members of the teaching team at the PILM - MIT Bioinformatics Core (PILMBC). These are open access materials distributed under the terms of the Creative Commons Attribution license (CC BY 4.0), which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.

Differential Expression for multiple contrasts

Lorena Pantano

2019-07-08

set up

Parent folder

Check params

Load data

Quality metrics

Number of genes

PCA

Results

Alpha level (FDR) cutoffs

group_E_vs_D

Plots

Mean average (MA)

group_E_vs_D

Volcano

group_E_vs_D

Heatmap

group_E_vs_D

PCA

group_E_vs_D

Gene Expression Patterns

Top genes

group_E_vs_D

Top tables

group_E_vs_D

File downloads

Count matrices

Differential expression tables

Methods

R session information