CmhaDSO stringtie

使用例

stringtie sorted.bam -e -G reference.gtf -o out.gtf -A express.txt

  # 1: Ensembl gene ID
  # 2: gene name
  # 3: chromosome name
  # 4: strand
  # 5: start
  # 6: end
  # 7: cover
  # 8: FPKM
  # 9: TPM
DESeq2入力データ
DESeq2入力用データは prepDE.pyを利用することで作成できる.
python3 prepDE.py -i list

  # listファイル: sample名とfile_pathをタブ区切りテキストで記述
  name	fct
  d1	d1.gtf
  d2  d2.gtf
  d3  d3.gtf
tmp=/tmp/$$
masta=$1
shift
printf '%s\n' "$@" > $tmp

awk 'BEGIN{
	FS="\t"
  while(getline < "'$tmp'" > 0){ tranfile[++cnt] = $0 }

  for(i=1; i<=cnt; i++){
	  while (getline < tranfile[i] > 0) {
      if($9~/ TPM /) { split($9, tmp, " ")
        if(tmp[1]!="gene_id"       || 
           tmp[3]!="transcript_id" ||
           tmp[11]!="TPM"){exit 1}
		    gsub(/^"/,   "", tmp[4] )
		    gsub(/\..*$/,"", tmp[4] )
		    gsub(/^"/,   "", tmp[12])
		    gsub(/";$/,  "", tmp[12])
        tran[i, tmp[4]] = tmp[12]
		  }
    }
  }
}
$9~/ TPM /{split($9, masta, " ")
  if(masta[1]!="gene_id"       || 
     masta[3]!="transcript_id" ||
     masta[11]!="TPM"){exit 1}

  gsub(/^"/,   "", masta[2] )
  gsub(/\..*$/,"", masta[2] )
  gsub(/^"/,   "", masta[4] )
  gsub(/\..*$/,"", masta[4] )
  gsub(/^"/,   "", masta[12])
  gsub(/";$/,  "", masta[12])
	
	ck=0
	for(i=1; i<=cnt; i++){ ck += (i, masta[4]) in tran } 
  if(ck==cnt) { printf("%s %s %s ", masta[4], masta[2], masta[12])
    for(i=1; i<=cnt; i++){ printf ("%f ", tran[i, masta[4]])}
	  printf("\n")}
}' $masta || error $? "ERROR"

参考文献