CmhaDSO python_scRNA_QC

使用例

#@ LOAD
%matplotlib inline

import numpy as np
import pandas as pd
import scipy.io
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
#@ INPUT
i = pd.read_csv("CELLRANGER/features.tsv.gz",sep='\t', index_col=0, names=["Symbol","Label"])
j = pd.read_csv("CELLRANGER/barcodes.tsv.gz", header=None)
mm = scipy.io.mmread("CELLRANGER/matrix.mtx.gz").toarray()

df = pd.DataFrame(mm, index=i.index, columns=j)
df.drop(df.index[df.values.sum(axis=1)==0], inplace=True)

print(df.shape)
df.iloc[:5,:5]
#@ QC Metrics
nUMIs = df.values.sum(axis=0)
nGenes = (df.values > 0.0).astype(int).sum(axis=0)
Mt = i.index[i["Symbol"].str.startswith('mt-')]
pMt = 100 * df.loc[Mt, :].values.sum(axis=0) / df.values.sum(axis=0)
#@ QC Visualize
fig = plt.figure(figsize=(12,3))
ax1=plt.subplot(1,3,1)
sns.violinplot(y=nUMIs, orient='v', ax=ax1).set_title('nUMIs')
ax2=plt.subplot(1,3,2)
sns.violinplot(y=nGenes, orient='v', ax=ax2).set_title('nGenes')
ax3=plt.subplot(1,3,3)
sns.violinplot(y=pMt, orient='v', ax=ax3).set_title('pMt')
plt.show
#@ SUBSET
minGenes = 200
maxpMt = 5

fCells = df.columns[(nGenes >= minGenes) & (pMt <= maxpMt)]
df_fCells = df.loc[:, fCells]
df_fCells.shape
#@ WRITE
df_fCells.to_pickle("df_fCells.gz")