CmhaDSO alevin fry

使用例

#@ VAR PE_1=A/Aged_S1_L006_L1_001.fastq.gz PE_2=A/Aged_S1_L006_L2_001.fastq.gz genome=~/.GTA/REF/M/CURRENT/g.fa # not gzip compressed gtf=~/.GTA/REF/M/CURRENT/a.gtf # not gzip compressed insert_len=91 flank_trim_len=5 n=$((insert_len - flank_trim_len)) prefix=splici splici=splici_$n salmon_splici=salmon_splici_$n t2g=$splici/${prefix}_fl${n}_t2g_3col.tsv gid2name=gid2name out1=alevin_out out2=af_tmp out3=af_res p=6 #@ INSTALL #conda install -c bioconda salmon alevin-fry pyroe bedtools #conda install -c bioconda scVelo #conda install -c conda-forge leidenalg jupyterlab python-igraph #@ SPLICI pyroe make-splici \ $genome \ $gtf \ $insert_len \ $splici \ --flank-trim-length $flank_trim_len \ --filename-prefix $prefix #@ GID2NAME cat $gtf | awk '$3=="transcript"' | cut -f9 | cut -d';' -f1,4 | tr -d ';"' | awk '$1!~/_PAR_Y$/' | awk '{print $2,$4}' | sort -u > $gid2name #@ SALMON INDEX salmon index \ -t $splici/${prefix}_fl$n.fa \ -i $salmon_splici \ -p $p #@ SALMON ALEVIN salmon alevin \ -i $salmon_splici \ -l IU \ --chromiumV3 \ --sketch \ -1 $PE_1 \ -2 $PE_2 \ -o $out1 #@ ALEVIN FRY alevin-fry generate-permit-list \ -d fw \ -k \ -i $out1 \ -o $out2 alevin-fry collate \ -t $p \ -i $out2 \ -r $out1 alevin-fry quant \ -t $p \ -i $out2 \ -o $out3 \ --tg-map $t2g \ --resolution cr-like \ --use-mtx #@ OUTPUT # $out3/quant.json # $out3/alevin/quants_mat.mtx # $out3/alevin/quants_mat_cols.txt # $out3/alevin/quants_mat_rows.txt #@ ### PYTHON ENV ### import numpy as np import pandas as pd import scanpy as sc import anndata import scvelo as scv import matplotlib import scipy import json import os scv.settings.verbosity = 3 scv.settings.presenter_view = True # set max width size scv.set_figure_params('scvelo') # for beautiful visualization %matplotlib inline #@ frydir = "res" e2n_path = "gid2name" fpath = os.path.sep.join([frydir, "quant.json"]) meta_info = json.load(open(fpath)) ng = meta_info['num_genes'] af_raw = sc.read_mtx(os.path.sep.join([frydir, "alevin", "quants_mat.mtx"])) ng = int(ng/3) e2n = dict([ l.rstrip().split() for l in open(e2n_path).readlines()]) var_names = [ l.rstrip() for l in open(os.path.sep.join([frydir, "alevin", "quants_mat_cols.txt"])).readlines()][:ng] var_names = [e2n[e] for e in var_names] obs_names = [ l.rstrip() for l in open(os.path.sep.join([frydir, "alevin", "quants_mat_rows.txt"])).readlines() ] x = af_raw.X spliced = x[:,range(0,ng)] + x[:,range(2*ng,3*ng)] unspliced = x[:,range(ng, 2*ng)] #@ # create AnnData using spliced and unspliced count matrix adata = anndata.AnnData(X = spliced, layers = dict(spliced = spliced, unspliced = unspliced)) adata.obs_names = obs_names adata.var_names = var_names adata.var_names_make_unique() print(adata) #@ # get embeddings sc.tl.pca(adata) sc.pp.neighbors(adata) sc.tl.umap(adata, n_components=2) #@ # get the proportion of spliced and unspliced count scv.utils.show_proportions(adata) #@ # filter cells and genes, then normalize expression values scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000,enforce=True) #@ # scVelo pipeline scv.pp.moments(adata, n_pcs=30, n_neighbors=30) scv.tl.recover_dynamics(adata, n_jobs = 8) scv.tl.velocity(adata, mode = 'dynamical') scv.tl.velocity_graph(adata, n_jobs=8) scv.tl.latent_time(adata) #@ adata.write("scvelo.h5ad", compression="gzip") #@ adata = sc.read("scvelo.h5ad") #@ adata adata.var_names_make_unique() #@ scv.pl.velocity_embedding_stream(adata, basis='X_umap') #@ scv.pl.scatter(adata, color='latent_time', color_map='gnuplot', size=80, colorbar=True) #@ scv.pl.velocity(adata, var_names=['Ins2', 'Ins1']) #@ scv.pl.scatter(adata, color='latent_time', y=['Ins1','Ins2'],frameon=False, colorbar=True) #@ sc.tl.leiden(adata) # This is needed due to a current bug #adata.uns['neighbors']['distances'] = adata.obsp['distances'] #adata.uns['neighbors']['connectivities'] = adata.obsp['connectivities'] scv.tl.paga(adata, groups='leiden') # NEED TO CHANGE groups to cluster #df = scv.get_df(adata, 'paga/transitions_confidence', precision=2).T #df.style.background_gradient(cmap='Blues').format('{:.2g}') scv.pl.paga(adata, basis='umap', size=50, alpha=.1, min_edge_width=2, node_size_scale=1.5)