fork download
  1. #!/usr/bin/awk -f
  2. BEGIN {
  3. FS=OFS="\t"
  4. }
  5. {
  6. gsub(/; *$/, "", $9) # trim trailing `;'
  7. split($9, pairs, / *; */) # split attributes into pairs
  8. for (i in pairs) {
  9. split(pairs[i], kv, / */) # split pair into key and value
  10. attr[kv[1]] = kv[2] # add it to `attr'
  11. }
  12. # fill missing fields
  13. if (!("gene_name" in attr))
  14. attr["gene_name"] = attr["gene_id"]
  15. if (!("transcript_id" in attr))
  16. attr["transcript_id"] = attr["gene_id"]
  17. if (!("transcript_name" in attr))
  18. attr["transcript_name"] = attr["transcript_id"];
  19. # recreate the attributes field
  20. attr_all = sep = ""
  21. for (k in attr) {
  22. attr_all = attr_all sep k " " attr[k]
  23. sep = "; "
  24. }
  25. # update the record with new attributes
  26. $9 = attr_all
  27. }
  28. 1 # print record
Success #stdin #stdout 0s 7376KB
stdin
1	ensembl	gene	5273	10061	.	-	.	gene_id ENSGALG00000054818; gene_version 1; gene_source ensembl; gene_biotype protein_coding;
1	ensembl	transcript	5273	10061	.	-	.	gene_id ENSGALG00000054818; gene_version 1; transcript_id ENSGALT00000098984; transcript_version 1; gene_source ensembl; gene_biotype protein_coding; transcript_source ensembl; transcript_biotype protein_coding;
1	ensembl	gene	58427	58617	.	+	.	gene_id ENSGALG00000047594; gene_version 1; gene_name RF00004; gene_source ensembl; gene_biotype snRNA;
1	ensembl	transcript	58427	58617	.	+	.	gene_id ENSGALG00000047594; gene_version 1; transcript_id ENSGALT00000094382; transcript_version 1; gene_name RF00004; gene_source ensembl; gene_biotype snRNA; transcript_name RF00004-201; transcript_source ensembl; transcript_biotype snRNA;
1	ensembl	exon	58427	58617	.	+	.	gene_id ENSGALG00000047594; gene_version 1; transcript_id ENSGALT00000094382; transcript_version 1; exon_number 1; gene_name RF00004; gene_source ensembl; gene_biotype snRNA; transcript_name RF00004-201; transcript_source ensembl; transcript_biotype snRNA; exon_id ENSGALE00000460125; exon_version 1;
1	ensembl	gene	63264	63454	.	+	.	gene_id ENSGALG00000049206; gene_version 1; gene_name RF00004; gene_source ensembl; gene_biotype snRNA;
1	ensembl	transcript	63264	63454	.	+	.	gene_id ENSGALG00000049206; gene_version 1; transcript_id ENSGALT00000092780; transcript_version 1; gene_name RF00004; gene_source ensembl; gene_biotype snRNA; transcript_name RF00004-201; transcript_source ensembl; transcript_biotype snRNA;
1	ensembl	exon	63264	63454	.	+	.	gene_id ENSGALG00000049206; gene_version 1; transcript_id ENSGALT00000092780; transcript_version 1; exon_number 1; gene_name RF00004; gene_source ensembl; gene_biotype snRNA; transcript_name RF00004-201; transcript_source ensembl; transcript_biotype snRNA; exon_id ENSGALE00000501941; exon_version 1;
stdout
1	ensembl	gene	5273	10061	.	-	.	transcript_name ENSGALG00000054818; transcript_id ENSGALG00000054818; gene_name ENSGALG00000054818; gene_id ENSGALG00000054818; gene_source ensembl; gene_biotype protein_coding; gene_version 1
1	ensembl	transcript	5273	10061	.	-	.	transcript_name ENSGALG00000054818; transcript_id ENSGALT00000098984; transcript_source ensembl; gene_name ENSGALG00000054818; transcript_biotype protein_coding; gene_id ENSGALG00000054818; transcript_version 1; gene_source ensembl; gene_biotype protein_coding; gene_version 1
1	ensembl	gene	58427	58617	.	+	.	transcript_name ENSGALG00000054818; transcript_id ENSGALT00000098984; transcript_source ensembl; gene_name RF00004; transcript_biotype protein_coding; gene_id ENSGALG00000047594; transcript_version 1; gene_source ensembl; gene_biotype snRNA; gene_version 1
1	ensembl	transcript	58427	58617	.	+	.	transcript_name RF00004-201; transcript_id ENSGALT00000094382; transcript_source ensembl; gene_name RF00004; transcript_biotype snRNA; gene_id ENSGALG00000047594; transcript_version 1; gene_source ensembl; gene_biotype snRNA; gene_version 1
1	ensembl	exon	58427	58617	.	+	.	transcript_name RF00004-201; transcript_id ENSGALT00000094382; transcript_source ensembl; gene_name RF00004; transcript_biotype snRNA; gene_id ENSGALG00000047594; transcript_version 1; exon_version 1; gene_source ensembl; gene_biotype snRNA; exon_number 1; gene_version 1; exon_id ENSGALE00000460125
1	ensembl	gene	63264	63454	.	+	.	transcript_name RF00004-201; transcript_id ENSGALT00000094382; transcript_source ensembl; gene_name RF00004; transcript_biotype snRNA; gene_id ENSGALG00000049206; transcript_version 1; exon_version 1; gene_source ensembl; gene_biotype snRNA; exon_number 1; gene_version 1; exon_id ENSGALE00000460125
1	ensembl	transcript	63264	63454	.	+	.	transcript_name RF00004-201; transcript_id ENSGALT00000092780; transcript_source ensembl; gene_name RF00004; transcript_biotype snRNA; gene_id ENSGALG00000049206; transcript_version 1; exon_version 1; gene_source ensembl; gene_biotype snRNA; exon_number 1; gene_version 1; exon_id ENSGALE00000460125
1	ensembl	exon	63264	63454	.	+	.	transcript_name RF00004-201; transcript_id ENSGALT00000092780; transcript_source ensembl; gene_name RF00004; transcript_biotype snRNA; gene_id ENSGALG00000049206; transcript_version 1; exon_version 1; gene_source ensembl; gene_biotype snRNA; exon_number 1; gene_version 1; exon_id ENSGALE00000501941