CmhaDSO python_scRNA_NORM_SCALE

使用例

#@ LOAD
%matplotlib inline

import numpy as np
import pandas as pd
import scipy.io
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
#@ INPUT
df_fCells = pd.read_pickle("df_fCells.gz")
#@ NORMALIZE
v = 10000 * df_fCells.values / df_fCells.values.sum(axis=0)
v = np.log1p(v)
df_fCells_norm = pd.DataFrame(v, index=df_fCells.index, columns = df_fCells.columns)
#@ HVGs
mean_expr = np.expm1(df_fCells_norm.values).mean(axis=1)
mean_expr[mean_expr == 0 ] = 1e-12
log_mean_expr = np.log1p(mean_expr)
stats = pd.DataFrame(log_mean_expr,
index=df_fCells_norm.index,
columns=['Mean'])

disp = np.expm1(df_fCells_norm.values).var(axis=1, ddof=1) / mean_expr
disp[disp == 0] = np.nan
disp = np.log1p(disp)
stats['Disp'] = disp

top = 500
genes_disp = stats.sort_values(by=['Disp'], ascending=False)
hvg = genes_disp.head(top).index
df_hvg = df_fCells_norm.loc[stats.index.isin(hvg), :]
df_hvg.shape

hvg = genes_disp.head(top)
#@ SCALE
scaler = StandardScaler()
scaled_values = scaler.fit_transform(df_hvg.values.T)
scaled_values = np.clip(scaled_values, None, 10)
df_fCells_norm_scal = pd.DataFrame(scaled_values.T, index=df_hvg.index, columns=df_hvg.columns)