fork download
  1. #!/bin/bash
  2.  
  3. # ideone boilerplate: run in temp dir
  4. t=$(mktemp -d -t ideone.XXXXXXXX) || exit
  5. trap 'rm -rf "$t"' ERR EXIT
  6. cd "$t"
  7.  
  8. tr : '\t' <<\: >test.txt
  9. # start gene 1Chr.g1
  10. 1Chr:AUGUSTUS:gene:3636:5916:0.1:+:.:ID=1Chr.g1
  11. 1Chr:AUGUSTUS:transcript:3636:5916:0.1:+:.:ID=1Chr.g1.t1;Parent=1Chr.g1
  12. 1Chr:AUGUSTUS:transcription_start_site:3636:3636:.:+:.:Parent=1Chr.g1.t1
  13. 1Chr:AUGUSTUS:exon:3636:3913:.:+:.:Parent=1Chr.g1.t1
  14. 1Chr:AUGUSTUS:start_codon:3760:3762:.:+:0:Parent=1Chr.g1.t1
  15. 1Chr:AUGUSTUS:intron:3914:3995:1:+:.:
  16. 1Chr:AUGUSTUS:CDS:3760:3913:1:+:0:ID=1Chr.g1.t1.cds;Parent=1Chr.g1.t1
  17. 1Chr:AUGUSTUS:stop_codon:5628:5630:.:+:0:Parent=1Chr.g1.t1
  18. 1Chr:AUGUSTUS:transcription_end_site:5916:5916:.:+:.:Parent=1Chr.g1.t1
  19. # start gene 1Chr.g2
  20. 1Chr:AUGUSTUS:gene:5938:8761:0.17:-:.:ID=1Chr.g2
  21. 1Chr:AUGUSTUS:transcript:5938:8761:0.17:-:.:ID=1Chr.g2.t1;Parent=1Chr.g2
  22. 1Chr:AUGUSTUS:transcription_end_site:5938:5938:.:-:.:Parent=1Chr.g2.t1
  23. 1Chr:AUGUSTUS:exon:5938:6594:.:-:.:Parent=1Chr.g2.t1
  24. 1Chr:AUGUSTUS:stop_codon:6428:6430:.:-:0:Parent=1Chr.g2.t1
  25. 1Chr:AUGUSTUS:intron:6595:7156:0.8:-:.:Parent=1Chr.g2.t1
  26. 1Chr:AUGUSTUS:CDS:6428:6594:0.89:-:2:ID=1Chr.g2.t1.cds;Parent=1Chr.g2.t1
  27. # start gene 2Chr.g1
  28. 2Chr:AUGUSTUS:gene:11612:13481:0.09:-:.:ID=2Chr.g1
  29. 2Chr:AUGUSTUS:transcript:11612:13481:0.09:-:.:ID=2Chr.g1.t1;Parent=2Chr.g1
  30. 2Chr:AUGUSTUS:transcription_end_site:11612:11612:.:-:.:Parent=2Chr.g1.t1
  31. 2Chr:AUGUSTUS:exon:11612:13481:.:-:.:Parent=2Chr.g1.t1
  32. 2Chr:AUGUSTUS:stop_codon:11864:11866:.:-:0:Parent=2Chr.g1.t1
  33. 2Chr:AUGUSTUS:CDS:11864:12940:1:-:0:ID=2Chr.g1.t1.cds;Parent=2Chr.g1.t1
  34. 2Chr:AUGUSTUS:start_codon:12938:12940:.:-:0:Parent=2Chr.g1.t1
  35. 2Chr:AUGUSTUS:transcription_start_site:13481:13481:.:-:.:Parent=2Chr.g1.t1
  36. # start gene 2Chr g2
  37. 2Chr:AUGUSTUS:gene:22876:31223:0.04:+:.:ID=2Chr.g2
  38. 2Chr:AUGUSTUS:transcript:22876:31223:0.04:+:.:ID=2Chr.g2.t1;Parent=2Chr.g2
  39. 2Chr:AUGUSTUS:transcription_start_site:22876:22876:.:+:.:Parent=2Chr.g2.t1
  40. 2Chr:AUGUSTUS:exon:22876:23456:.:+:.:Parent=2Chr.g2.t1
  41. 2Chr:AUGUSTUS:exon:23515:24451:.:+:.:Parent=2Chr.g2.t1
  42. 2Chr:AUGUSTUS:start_codon:23519:23521:.:+:0:Parent=2Chr.g2.t1
  43. :
  44.  
  45. echo "*** awk" >&2
  46. time awk -F '\t' '$3 == "gene" {
  47. g=$9; sub(/^[^=]*=/, "", g); gsub(/=/, "", g);
  48. a[g] = "g" ++n }
  49. { for(k in a) gsub(k, a[k]) }1' test.txt
  50.  
  51. echo "*** sed" >&2
  52. op () {
  53. awk '$3 == "gene"' $1 |cut -f9 |grep -o "=.*" |sed -e 's/=//g' >LIST.txt
  54. COUNTER=0
  55. while read -r line; do
  56. COUNTER=$(expr $COUNTER + 1)
  57. echo "sed -i 's/$line/g$COUNTER/g' $1" |bash
  58. done <LIST.txt
  59. rm LIST.txt
  60. }
  61.  
  62. cp test.txt copy.txt
  63. time op test.txt
  64.  
  65. echo "*** refactored sed" >&2
  66. refactored () {
  67. counter=0
  68. awk '$3 == "gene"' "$1" |
  69. cut -f9 |
  70. grep -o "=.*" |
  71. sed -e 's/=//g' |
  72. while IFS='' read -r line; do
  73. ((counter++))
  74. echo "s/$line/g$counter/g"
  75. done |
  76. sed -i -f - "$1"
  77. }
  78.  
  79. time refactored copy.txt
  80.  
Success #stdin #stdout #stderr 0.04s 5512KB
stdin
Standard input is empty
stdout
# start gene 1Chr.g1
1Chr	AUGUSTUS	gene	3636	5916	0.1	+	.	ID=g1
1Chr	AUGUSTUS	transcript	3636	5916	0.1	+	.	ID=g1.t1;Parent=g1
1Chr	AUGUSTUS	transcription_start_site	3636	3636	.	+	.	Parent=g1.t1
1Chr	AUGUSTUS	exon	3636	3913	.	+	.	Parent=g1.t1
1Chr	AUGUSTUS	start_codon	3760	3762	.	+	0	Parent=g1.t1
1Chr	AUGUSTUS	intron	3914	3995	1	+	.	
1Chr	AUGUSTUS	CDS	3760	3913	1	+	0	ID=g1.t1.cds;Parent=g1.t1
1Chr	AUGUSTUS	stop_codon	5628	5630	.	+	0	Parent=g1.t1
1Chr	AUGUSTUS	transcription_end_site	5916	5916	.	+	.	Parent=g1.t1
# start gene 1Chr.g2
1Chr	AUGUSTUS	gene	5938	8761	0.17	-	.	ID=g2
1Chr	AUGUSTUS	transcript	5938	8761	0.17	-	.	ID=g2.t1;Parent=g2
1Chr	AUGUSTUS	transcription_end_site	5938	5938	.	-	.	Parent=g2.t1
1Chr	AUGUSTUS	exon	5938	6594	.	-	.	Parent=g2.t1
1Chr	AUGUSTUS	stop_codon	6428	6430	.	-	0	Parent=g2.t1
1Chr	AUGUSTUS	intron	6595	7156	0.8	-	.	Parent=g2.t1
1Chr	AUGUSTUS	CDS	6428	6594	0.89	-	2	ID=g2.t1.cds;Parent=g2.t1
# start gene 2Chr.g1
2Chr	AUGUSTUS	gene	11612	13481	0.09	-	.	ID=g3
2Chr	AUGUSTUS	transcript	11612	13481	0.09	-	.	ID=g3.t1;Parent=g3
2Chr	AUGUSTUS	transcription_end_site	11612	11612	.	-	.	Parent=g3.t1
2Chr	AUGUSTUS	exon	11612	13481	.	-	.	Parent=g3.t1
2Chr	AUGUSTUS	stop_codon	11864	11866	.	-	0	Parent=g3.t1
2Chr	AUGUSTUS	CDS	11864	12940	1	-	0	ID=g3.t1.cds;Parent=g3.t1
2Chr	AUGUSTUS	start_codon	12938	12940	.	-	0	Parent=g3.t1
2Chr	AUGUSTUS	transcription_start_site	13481	13481	.	-	.	Parent=g3.t1
# start gene 2Chr g2
2Chr	AUGUSTUS	gene	22876	31223	0.04	+	.	ID=g4
2Chr	AUGUSTUS	transcript	22876	31223	0.04	+	.	ID=g4.t1;Parent=g4
2Chr	AUGUSTUS	transcription_start_site	22876	22876	.	+	.	Parent=g4.t1
2Chr	AUGUSTUS	exon	22876	23456	.	+	.	Parent=g4.t1
2Chr	AUGUSTUS	exon	23515	24451	.	+	.	Parent=g4.t1
2Chr	AUGUSTUS	start_codon	23519	23521	.	+	0	Parent=g4.t1
stderr
*** awk

real	0m0.030s
user	0m0.003s
sys	0m0.000s
*** sed

real	0m0.051s
user	0m0.014s
sys	0m0.005s
*** refactored sed

real	0m0.006s
user	0m0.004s
sys	0m0.002s