#!/bin/bash
# ideone boilerplate: run in temp dir
t=$(mktemp -d -t ideone.XXXXXXXX) || exit
trap 'rm -rf "$t"' ERR EXIT
cd "$t"
tr : '\t' <<\: >test.txt
# start gene 1Chr.g1
1Chr:AUGUSTUS:gene:3636:5916:0.1:+:.:ID=1Chr.g1
1Chr:AUGUSTUS:transcript:3636:5916:0.1:+:.:ID=1Chr.g1.t1;Parent=1Chr.g1
1Chr:AUGUSTUS:transcription_start_site:3636:3636:.:+:.:Parent=1Chr.g1.t1
1Chr:AUGUSTUS:exon:3636:3913:.:+:.:Parent=1Chr.g1.t1
1Chr:AUGUSTUS:start_codon:3760:3762:.:+:0:Parent=1Chr.g1.t1
1Chr:AUGUSTUS:intron:3914:3995:1:+:.:
1Chr:AUGUSTUS:CDS:3760:3913:1:+:0:ID=1Chr.g1.t1.cds;Parent=1Chr.g1.t1
1Chr:AUGUSTUS:stop_codon:5628:5630:.:+:0:Parent=1Chr.g1.t1
1Chr:AUGUSTUS:transcription_end_site:5916:5916:.:+:.:Parent=1Chr.g1.t1
# start gene 1Chr.g2
1Chr:AUGUSTUS:gene:5938:8761:0.17:-:.:ID=1Chr.g2
1Chr:AUGUSTUS:transcript:5938:8761:0.17:-:.:ID=1Chr.g2.t1;Parent=1Chr.g2
1Chr:AUGUSTUS:transcription_end_site:5938:5938:.:-:.:Parent=1Chr.g2.t1
1Chr:AUGUSTUS:exon:5938:6594:.:-:.:Parent=1Chr.g2.t1
1Chr:AUGUSTUS:stop_codon:6428:6430:.:-:0:Parent=1Chr.g2.t1
1Chr:AUGUSTUS:intron:6595:7156:0.8:-:.:Parent=1Chr.g2.t1
1Chr:AUGUSTUS:CDS:6428:6594:0.89:-:2:ID=1Chr.g2.t1.cds;Parent=1Chr.g2.t1
# start gene 2Chr.g1
2Chr:AUGUSTUS:gene:11612:13481:0.09:-:.:ID=2Chr.g1
2Chr:AUGUSTUS:transcript:11612:13481:0.09:-:.:ID=2Chr.g1.t1;Parent=2Chr.g1
2Chr:AUGUSTUS:transcription_end_site:11612:11612:.:-:.:Parent=2Chr.g1.t1
2Chr:AUGUSTUS:exon:11612:13481:.:-:.:Parent=2Chr.g1.t1
2Chr:AUGUSTUS:stop_codon:11864:11866:.:-:0:Parent=2Chr.g1.t1
2Chr:AUGUSTUS:CDS:11864:12940:1:-:0:ID=2Chr.g1.t1.cds;Parent=2Chr.g1.t1
2Chr:AUGUSTUS:start_codon:12938:12940:.:-:0:Parent=2Chr.g1.t1
2Chr:AUGUSTUS:transcription_start_site:13481:13481:.:-:.:Parent=2Chr.g1.t1
# start gene 2Chr g2
2Chr:AUGUSTUS:gene:22876:31223:0.04:+:.:ID=2Chr.g2
2Chr:AUGUSTUS:transcript:22876:31223:0.04:+:.:ID=2Chr.g2.t1;Parent=2Chr.g2
2Chr:AUGUSTUS:transcription_start_site:22876:22876:.:+:.:Parent=2Chr.g2.t1
2Chr:AUGUSTUS:exon:22876:23456:.:+:.:Parent=2Chr.g2.t1
2Chr:AUGUSTUS:exon:23515:24451:.:+:.:Parent=2Chr.g2.t1
2Chr:AUGUSTUS:start_codon:23519:23521:.:+:0:Parent=2Chr.g2.t1
:
echo "*** awk" >&2
time awk -F '\t' '$3 == "gene" {
g=$9; sub(/^[^=]*=/, "", g); gsub(/=/, "", g);
a[g] = "g" ++n }
{ for(k in a) gsub(k, a[k]) }1' test.txt
echo "*** sed" >&2
op () {
awk '$3 == "gene"' $1 |cut -f9 |grep -o "=.*" |sed -e 's/=//g' >LIST.txt
COUNTER=0
while read -r line; do
COUNTER=$(expr $COUNTER + 1)
echo "sed -i 's/$line/g$COUNTER/g' $1" |bash
done <LIST.txt
rm LIST.txt
}
cp test.txt copy.txt
time op test.txt
echo "*** refactored sed" >&2
refactored () {
counter=0
awk '$3 == "gene"' "$1" |
cut -f9 |
grep -o "=.*" |
sed -e 's/=//g' |
while IFS='' read -r line; do
((counter++))
echo "s/$line/g$counter/g"
done |
sed -i -f - "$1"
}
time refactored copy.txt