$ conda install -c conda-forge r-remotes
> remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')
library(Seurat) require(DoubletFinder)
以下のデータはprefiltering, scaling, PCA, UMAP等を解析済みもの. Aggregateしたデータは使用しない (ex. Ctrlデータ + 介入データ)
so <- readRDS("tmp_a.rds") # PK identification (Ground Truthを使用しない場合) so_tmp_1 <- paramSweep_v3(so, PCs=1:16) so_tmp_2 <- summarizeSweep(so_tmp_1, GT=FALSE) bcmvn <- find.pK(so_tmp_2) barplot(bcmvn$BCmetric, names.arg=bcmvn$pK,las=2) # Bcmvn ~ Mean-variance normalized bimodality coefficient
#@ Set Doublets numbers # 上図のMax値である0.02を次式のpKに設定 nExp <- round(ncol(so)*0.10) # doubletsを10%と仮定 # 10x社の細胞インプット数と回収数の対応表を参照して決定 #@ Find Doublets so <- doubletFinder_v3(so, pN=0.25, pK=0.02, nExp=nExp, PCs=1:16) # PCs ~ The number of statistically-significant principal components # # pN ~ This defines the number of generated artificial doublets, # expressed as a proportion of the merged real-artificial data. # Default is set to 25%, based on observation that # DoubletFinder performance is largely pN-invariant # # pK ~ This defines the PC neighborhood size used to compute pANN, # expressed as a proportion of the merged real-artificial data. # No default is set, as pK should be adjusted for each scRNA-seq dataset. # # nExp ~ This defines the pANN threshold used to make final doublet/singlet predictions.
#@ Rename DF.name and Visualize DF.name = colnames(so@meta.data)[grepl("DF.classification", colnames(so@meta.data))] DimPlot(so, group.by=DF.name, pt.size=1.2) VlnPlot(so,features="nFeature_RNA", group.by=DF.name, pt.size=1.2)
so_filtered = so[,so@meta.data[,DF.name]=="Singlet"]