This notebook will plot the filtering and clustering results from the core analysis workflow.

Filtering results for library01

# Extract sce metadata
processed_meta <- metadata(processed_sce)

# Define number of cells
num_cells_pre_processed <- dim(pre_processed_sce)[2]
num_filtered_cells <- processed_meta$num_filtered_cells_retained

# Define sample id
if(is.null(processed_meta$sample)){
  sample <- NA
} else {
  sample <- processed_meta$sample
}

# Put a sample information data frame together
sample_information <- tibble::tibble(
  "Sample id" = sample,
  "Library id" = params$library,
  "Cells before filtering" = 
    format(num_cells_pre_processed, big.mark = ',', scientific = FALSE),
  "Cells after filtering" = 
    format(num_filtered_cells, big.mark = ',', scientific = FALSE),
  "Filtering type" = 
    format(processed_meta$filtering, big.mark = ',', scientific = FALSE)
)

if (processed_meta$filtering == "manually filtered"){
  
  sample_information <- sample_information %>%
    mutate(
      "UMI count cutoff" = 
        format(processed_meta$umi_count_cutoff, big.mark = ',', scientific = FALSE),
      "Percent mito cutoff" = 
        format(processed_meta$mito_percent_cutoff, big.mark = ',', scientific = FALSE),
      "Detected genes cutoff" = 
        format(processed_meta$detected_gene_cutoff, big.mark = ',', scientific = FALSE)
    )
}

if (processed_meta$filtering == "miQC filtered" ){
  
  sample_information <- sample_information %>%
    mutate("Probability compromised cutoff" = 
             format(processed_meta$probability_compromised_cutoff, big.mark = ',', scientific = FALSE))
}

sample_information <- sample_information %>%
  mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>% # reformat nulls
  t()

# Make the table with sample information
knitr::kable(sample_information, align = 'r') %>%
  kableExtra::kable_styling(bootstrap_options = "striped",
                            full_width = FALSE,
                            position = "left") %>%
  kableExtra::column_spec(2, monospace = TRUE)
Sample id sample01
Library id library01
Cells before filtering 1,707
Cells after filtering 1,268
Filtering type miQC filtered
Probability compromised cutoff 0.75

Filtering plots

The filtering plots below represent the cutoffs used to filter and remove low-quality cells. When miQC filtering is implemented, two plots are displayed. miQC relies on fitting a mixture model using the number of genes expressed by a cell and the percentage of mitochondrial reads. The first plot will show the model fit by miQC with two model fit lines and each cell being colored by the probability of that cell being compromised. The second plot indicates which cells have been removed based on the cutoff used for a cell’s probability of being compromised. Cells labeled with TRUE are kept for downstream analysis, while all other cells were removed. When manual filtering is implemented, one plot is displayed. The plot in this case represents the manual filtering cutoffs for the minimum number of genes detected per cell and the minimum unique molecular identifiers (UMI) per cell. The lines on the plot indicate the cutoffs, where cells with number of genes expressed and unique molecular identifiers (UMI) that are both above the minimum cutoffs are kept for downstream analysis. Additionally, cells are colored by percent mitochondrial reads. Cells with percent mitochondrial reads above the provided cutoff are removed prior to downstream analysis.

Here, cells were miQC filtered.

# Read in mito genes
mito_genes <- unique(readLines(params$mito_file))

if (is.null(pre_processed_sce$detected)) {
  # Add cell QC metrics to pre processed sce needed for plotting
  pre_processed_sce <- scpcaTools::add_cell_mito_qc(pre_processed_sce, mito_genes)
}
if (processed_meta$filtering == "manually filtered"){
  # Plot manual filtering thresholds
  filtered_cell_plot <- plot_manual_filtering(
    sce = pre_processed_sce,
    detected_gene_cutoff = processed_meta$detected_gene_cutoff,
    umi_count_cutoff = processed_meta$umi_count_cutoff
  )
}
if (processed_meta$filtering == "miQC filtered" ){
  # Remove `prob_compromised` if it exists
  pre_processed_sce$prob_compromised <- NULL
  # Plot model
  filtered_model_plot <- miQC::plotModel(pre_processed_sce, processed_meta$miQC_model)
  # Plot filtering
  filtered_cell_plot <- miQC::plotFiltering(pre_processed_sce, 
                                            processed_meta$miQC_model,
                                            posterior_cutoff = processed_meta$probability_compromised_cutoff)
  # Combine plots
  filtered_cell_plot <-
    ggarrange(filtered_model_plot,
              filtered_cell_plot,
              ncol = 1,
              nrow = 2)
}
  
filtered_cell_plot

Clustering results for library01

Below is a UMAP plot where each cell has been colored based on the cell’s cluster assignment. Here clustering was performed using louvain clustering with a nearest neighbors value of 10.

# Grab column name with clustering results
cluster_name <- paste(params$cluster_type, params$nearest_neighbors, sep = "_")

# Plot
plotReducedDim(
  processed_sce,
  dimred = "UMAP",
  colour_by = cluster_name,
  text_by = cluster_name,
  text_size = 4,
  point_size = 0.4,
  point_alpha = 0.5
) +
  theme_bw() +
  labs(
    caption = paste0(
      stringr::str_to_title(params$cluster_type),
      " clustering with nearest neighbors value ",
      params$nearest_neighbors
    )
  ) +
  guides(col = guide_legend("Cluster assignment", override.aes = list(size = 3))) +
  theme(text = element_text(size = 14),
        plot.caption = element_text(hjust = 0.5))

Session info

R session information
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices datasets  utils     methods  
## [8] base     
## 
## other attached packages:
##  [1] scater_1.26.0               scuttle_1.8.0              
##  [3] ggpubr_0.4.0                miQC_1.6.0                 
##  [5] dplyr_1.0.10                SingleCellExperiment_1.20.0
##  [7] SummarizedExperiment_1.28.0 Biobase_2.58.0             
##  [9] GenomicRanges_1.50.0        GenomeInfoDb_1.34.1        
## [11] IRanges_2.32.0              S4Vectors_0.36.0           
## [13] BiocGenerics_0.44.0         MatrixGenerics_1.10.0      
## [15] matrixStats_0.62.0          readr_2.1.3                
## [17] ggplot2_3.4.0              
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-7              webshot_0.5.4            
##  [3] httr_1.4.4                tools_4.2.1              
##  [5] backports_1.4.1           bslib_0.4.1              
##  [7] utf8_1.2.2                R6_2.5.1                 
##  [9] irlba_2.3.5.1             vipor_0.4.5              
## [11] colorspace_2.0-3          nnet_7.3-18              
## [13] withr_2.5.0               gridExtra_2.3            
## [15] tidyselect_1.2.0          compiler_4.2.1           
## [17] rvest_1.0.3               cli_3.4.1                
## [19] BiocNeighbors_1.16.0      xml2_1.3.3               
## [21] DelayedArray_0.24.0       labeling_0.4.2           
## [23] sass_0.4.2                scales_1.2.1             
## [25] systemfonts_1.0.4         stringr_1.4.1            
## [27] digest_0.6.30             svglite_2.1.0            
## [29] rmarkdown_2.17            XVector_0.38.0           
## [31] pkgconfig_2.0.3           htmltools_0.5.3          
## [33] sparseMatrixStats_1.10.0  highr_0.9                
## [35] fastmap_1.1.0             rlang_1.0.6              
## [37] rstudioapi_0.14           DelayedMatrixStats_1.20.0
## [39] farver_2.1.1              jquerylib_0.1.4          
## [41] generics_0.1.3            jsonlite_1.8.3           
## [43] BiocParallel_1.32.0       car_3.1-1                
## [45] RCurl_1.98-1.9            magrittr_2.0.3           
## [47] BiocSingular_1.14.0       kableExtra_1.3.4         
## [49] modeltools_0.2-23         GenomeInfoDbData_1.2.9   
## [51] Matrix_1.5-1              Rcpp_1.0.9               
## [53] ggbeeswarm_0.6.0          munsell_0.5.0            
## [55] fansi_1.0.3               viridis_0.6.2            
## [57] abind_1.4-5               lifecycle_1.0.3          
## [59] stringi_1.7.8             yaml_2.3.6               
## [61] carData_3.0-5             zlibbioc_1.44.0          
## [63] flexmix_2.3-18            grid_4.2.1               
## [65] ggrepel_0.9.1             parallel_4.2.1           
## [67] lattice_0.20-45           cowplot_1.1.1            
## [69] beachmat_2.14.0           splines_4.2.1            
## [71] hms_1.1.2                 knitr_1.40               
## [73] pillar_1.8.1              ggsignif_0.6.4           
## [75] codetools_0.2-18          ScaledMatrix_1.6.0       
## [77] glue_1.6.2                evaluate_0.17            
## [79] renv_0.16.0               vctrs_0.5.0              
## [81] tzdb_0.3.0                gtable_0.3.1             
## [83] purrr_0.3.5               tidyr_1.2.1              
## [85] cachem_1.0.6              xfun_0.34                
## [87] rsvd_1.0.5                broom_1.0.1              
## [89] rstatix_0.7.0             viridisLite_0.4.1        
## [91] tibble_3.1.8              beeswarm_0.4.0           
## [93] ellipsis_0.3.2