fork(1) download
  1. import re
  2. def contains_abbrev(abbrev, text):
  3. text = text.lower()
  4. if not abbrev.isupper():
  5. return False
  6. cnt = 0
  7. for c in abbrev.lower():
  8. if text.find(c) > -1:
  9. text = text[text.find(c):]
  10. cnt += 1
  11. continue
  12. return cnt == len(abbrev)
  13.  
  14. text= "Some example text (SET) that demonstrates what I'm looking for. Energy system models (ESM) are used to find specific optima (SCO). Some say computer systems (CUST) are cool. In the summer playing outside (OUTS) should be preferred. Stupid example(s) Stupid example(S) Not stupid example (NSEMPLE), bad example (Bexle)"
  15. abbrev_rx = r'\b(([A-Z])\w*(?:\s+\w+)*?)\s*\((\2[A-Z]*)\)'
  16. print( [x.group() for x in re.finditer(abbrev_rx, text, re.I) if contains_abbrev(x.group(3), x.group(1))] )
  17.  
Success #stdin #stdout 0.02s 9768KB
stdin
Standard input is empty
stdout
['Some example text (SET)', 'Energy system models (ESM)', 'specific optima (SCO)', 'computer systems (CUST)', 'outside (OUTS)', 'Stupid example(S)', 'Not stupid example (NSEMPLE)']