This notebook will plot the filtering and clustering results from the core analysis workflow.
# Extract sce metadata
processed_meta <- metadata(processed_sce)
# Define number of cells
num_cells_pre_processed <- dim(pre_processed_sce)[2]
num_filtered_cells <- processed_meta$num_filtered_cells_retained
# Define sample id
if(is.null(processed_meta$sample)){
sample <- NA
} else {
sample <- processed_meta$sample
}
# Put a sample information data frame together
sample_information <- tibble::tibble(
"Sample id" = sample,
"Library id" = params$library,
"Cells before filtering" =
format(num_cells_pre_processed, big.mark = ',', scientific = FALSE),
"Cells after filtering" =
format(num_filtered_cells, big.mark = ',', scientific = FALSE),
"Filtering type" =
format(processed_meta$filtering, big.mark = ',', scientific = FALSE)
)
if (processed_meta$filtering == "manually filtered"){
sample_information <- sample_information %>%
mutate(
"UMI count cutoff" =
format(processed_meta$umi_count_cutoff, big.mark = ',', scientific = FALSE),
"Percent mito cutoff" =
format(processed_meta$mito_percent_cutoff, big.mark = ',', scientific = FALSE),
"Detected genes cutoff" =
format(processed_meta$detected_gene_cutoff, big.mark = ',', scientific = FALSE)
)
}
if (processed_meta$filtering == "miQC filtered" ){
sample_information <- sample_information %>%
mutate("Probability compromised cutoff" =
format(processed_meta$probability_compromised_cutoff, big.mark = ',', scientific = FALSE))
}
sample_information <- sample_information %>%
mutate(across(.fns = ~ifelse(.x == "NULL", "N/A", .x))) %>% # reformat nulls
t()
# Make the table with sample information
knitr::kable(sample_information, align = 'r') %>%
kableExtra::kable_styling(bootstrap_options = "striped",
full_width = FALSE,
position = "left") %>%
kableExtra::column_spec(2, monospace = TRUE)
| Sample id | sample01 |
| Library id | library01 |
| Cells before filtering | 1,707 |
| Cells after filtering | 1,268 |
| Filtering type | miQC filtered |
| Probability compromised cutoff | 0.75 |
The filtering plots below represent the cutoffs used to filter and
remove low-quality cells. When miQC filtering is
implemented, two plots are displayed. miQC relies on
fitting a mixture model using the number of genes expressed by a cell
and the percentage of mitochondrial reads. The first plot will show the
model fit by miQC with two model fit lines and each cell
being colored by the probability of that cell being compromised. The
second plot indicates which cells have been removed based on the cutoff
used for a cell’s probability of being compromised. Cells labeled with
TRUE are kept for downstream analysis, while all other
cells were removed. When manual filtering is implemented,
one plot is displayed. The plot in this case represents the manual
filtering cutoffs for the minimum number of genes detected per cell and
the minimum unique molecular identifiers (UMI) per cell. The lines on
the plot indicate the cutoffs, where cells with number of genes
expressed and unique molecular identifiers (UMI) that are both above the
minimum cutoffs are kept for downstream analysis. Additionally, cells
are colored by percent mitochondrial reads. Cells with percent
mitochondrial reads above the provided cutoff are removed prior to
downstream analysis.
Here, cells were miQC filtered.
# Read in mito genes
mito_genes <- unique(readLines(params$mito_file))
if (is.null(pre_processed_sce$detected)) {
# Add cell QC metrics to pre processed sce needed for plotting
pre_processed_sce <- scpcaTools::add_cell_mito_qc(pre_processed_sce, mito_genes)
}
if (processed_meta$filtering == "manually filtered"){
# Plot manual filtering thresholds
filtered_cell_plot <- plot_manual_filtering(
sce = pre_processed_sce,
detected_gene_cutoff = processed_meta$detected_gene_cutoff,
umi_count_cutoff = processed_meta$umi_count_cutoff
)
}
if (processed_meta$filtering == "miQC filtered" ){
# Remove `prob_compromised` if it exists
pre_processed_sce$prob_compromised <- NULL
# Plot model
filtered_model_plot <- miQC::plotModel(pre_processed_sce, processed_meta$miQC_model)
# Plot filtering
filtered_cell_plot <- miQC::plotFiltering(pre_processed_sce,
processed_meta$miQC_model,
posterior_cutoff = processed_meta$probability_compromised_cutoff)
# Combine plots
filtered_cell_plot <-
ggarrange(filtered_model_plot,
filtered_cell_plot,
ncol = 1,
nrow = 2)
}
filtered_cell_plot
Below is a UMAP plot where each cell has been colored based on the cell’s cluster assignment. Here clustering was performed using louvain clustering with a nearest neighbors value of 10.
# Grab column name with clustering results
cluster_name <- paste(params$cluster_type, params$nearest_neighbors, sep = "_")
# Plot
plotReducedDim(
processed_sce,
dimred = "UMAP",
colour_by = cluster_name,
text_by = cluster_name,
text_size = 4,
point_size = 0.4,
point_alpha = 0.5
) +
theme_bw() +
labs(
caption = paste0(
stringr::str_to_title(params$cluster_type),
" clustering with nearest neighbors value ",
params$nearest_neighbors
)
) +
guides(col = guide_legend("Cluster assignment", override.aes = list(size = 3))) +
theme(text = element_text(size = 14),
plot.caption = element_text(hjust = 0.5))
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] scater_1.26.0 scuttle_1.8.0
## [3] ggpubr_0.4.0 miQC_1.6.0
## [5] dplyr_1.0.10 SingleCellExperiment_1.20.0
## [7] SummarizedExperiment_1.28.0 Biobase_2.58.0
## [9] GenomicRanges_1.50.0 GenomeInfoDb_1.34.1
## [11] IRanges_2.32.0 S4Vectors_0.36.0
## [13] BiocGenerics_0.44.0 MatrixGenerics_1.10.0
## [15] matrixStats_0.62.0 readr_2.1.3
## [17] ggplot2_3.4.0
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 webshot_0.5.4
## [3] httr_1.4.4 tools_4.2.1
## [5] backports_1.4.1 bslib_0.4.1
## [7] utf8_1.2.2 R6_2.5.1
## [9] irlba_2.3.5.1 vipor_0.4.5
## [11] colorspace_2.0-3 nnet_7.3-18
## [13] withr_2.5.0 gridExtra_2.3
## [15] tidyselect_1.2.0 compiler_4.2.1
## [17] rvest_1.0.3 cli_3.4.1
## [19] BiocNeighbors_1.16.0 xml2_1.3.3
## [21] DelayedArray_0.24.0 labeling_0.4.2
## [23] sass_0.4.2 scales_1.2.1
## [25] systemfonts_1.0.4 stringr_1.4.1
## [27] digest_0.6.30 svglite_2.1.0
## [29] rmarkdown_2.17 XVector_0.38.0
## [31] pkgconfig_2.0.3 htmltools_0.5.3
## [33] sparseMatrixStats_1.10.0 highr_0.9
## [35] fastmap_1.1.0 rlang_1.0.6
## [37] rstudioapi_0.14 DelayedMatrixStats_1.20.0
## [39] farver_2.1.1 jquerylib_0.1.4
## [41] generics_0.1.3 jsonlite_1.8.3
## [43] BiocParallel_1.32.0 car_3.1-1
## [45] RCurl_1.98-1.9 magrittr_2.0.3
## [47] BiocSingular_1.14.0 kableExtra_1.3.4
## [49] modeltools_0.2-23 GenomeInfoDbData_1.2.9
## [51] Matrix_1.5-1 Rcpp_1.0.9
## [53] ggbeeswarm_0.6.0 munsell_0.5.0
## [55] fansi_1.0.3 viridis_0.6.2
## [57] abind_1.4-5 lifecycle_1.0.3
## [59] stringi_1.7.8 yaml_2.3.6
## [61] carData_3.0-5 zlibbioc_1.44.0
## [63] flexmix_2.3-18 grid_4.2.1
## [65] ggrepel_0.9.1 parallel_4.2.1
## [67] lattice_0.20-45 cowplot_1.1.1
## [69] beachmat_2.14.0 splines_4.2.1
## [71] hms_1.1.2 knitr_1.40
## [73] pillar_1.8.1 ggsignif_0.6.4
## [75] codetools_0.2-18 ScaledMatrix_1.6.0
## [77] glue_1.6.2 evaluate_0.17
## [79] renv_0.16.0 vctrs_0.5.0
## [81] tzdb_0.3.0 gtable_0.3.1
## [83] purrr_0.3.5 tidyr_1.2.1
## [85] cachem_1.0.6 xfun_0.34
## [87] rsvd_1.0.5 broom_1.0.1
## [89] rstatix_0.7.0 viridisLite_0.4.1
## [91] tibble_3.1.8 beeswarm_0.4.0
## [93] ellipsis_0.3.2