使用例
stringtie sorted.bam -e -G reference.gtf -o out.gtf -A express.txt
# 1: Ensembl gene ID
# 2: gene name
# 3: chromosome name
# 4: strand
# 5: start
# 6: end
# 7: cover
# 8: FPKM
# 9: TPM
DESeq2入力データ
DESeq2入力用データは
prepDE.pyを利用することで作成できる.
python3 prepDE.py -i list
# listファイル: sample名とfile_pathをタブ区切りテキストで記述
name fct
d1 d1.gtf
d2 d2.gtf
d3 d3.gtf
tmp=/tmp/$$
masta=$1
shift
printf '%s\n' "$@" > $tmp
awk 'BEGIN{
FS="\t"
while(getline < "'$tmp'" > 0){ tranfile[++cnt] = $0 }
for(i=1; i<=cnt; i++){
while (getline < tranfile[i] > 0) {
if($9~/ TPM /) { split($9, tmp, " ")
if(tmp[1]!="gene_id" ||
tmp[3]!="transcript_id" ||
tmp[11]!="TPM"){exit 1}
gsub(/^"/, "", tmp[4] )
gsub(/\..*$/,"", tmp[4] )
gsub(/^"/, "", tmp[12])
gsub(/";$/, "", tmp[12])
tran[i, tmp[4]] = tmp[12]
}
}
}
}
$9~/ TPM /{split($9, masta, " ")
if(masta[1]!="gene_id" ||
masta[3]!="transcript_id" ||
masta[11]!="TPM"){exit 1}
gsub(/^"/, "", masta[2] )
gsub(/\..*$/,"", masta[2] )
gsub(/^"/, "", masta[4] )
gsub(/\..*$/,"", masta[4] )
gsub(/^"/, "", masta[12])
gsub(/";$/, "", masta[12])
ck=0
for(i=1; i<=cnt; i++){ ck += (i, masta[4]) in tran }
if(ck==cnt) { printf("%s %s %s ", masta[4], masta[2], masta[12])
for(i=1; i<=cnt; i++){ printf ("%f ", tran[i, masta[4]])}
printf("\n")}
}' $masta || error $? "ERROR"
参考文献