fork download
  1. import re
  2. from itertools import product
  3.  
  4. start = "S"
  5. rules = {
  6. "S": ["", "ABR", "ABRCD"],
  7. "R": ["", "BR", "BRC"],
  8. "AB": ["ab"],
  9. "bB": ["Bb"],
  10. "aBb": ["aabb"],
  11. "CD": ["cd"],
  12. "Cc": ["cC"],
  13. "cCd": ["ccdd"],
  14. }
  15. empties = "[SR]"
  16.  
  17. max_length = 18
  18.  
  19. # Generate words with the grammar
  20. words = horizon = {start}
  21. heads = '|'.join(rules)
  22. heads = f'(?=({heads}))'
  23. while horizon:
  24. horizon = {
  25. new_word
  26. for word in horizon
  27. for match in re.finditer(heads, word)
  28. for head, start in [[match[1], match.start()]]
  29. for body in rules[head]
  30. for new_word in [word[:start] + body + word[start + len(head):]]
  31. if len(re.sub(empties, '', new_word)) <= max_length
  32. } - words
  33. words |= horizon
  34. generated_words = {word for word in words
  35. if not word or word.islower()}
  36.  
  37. # Generate words in a simple was as reference
  38. correct_words = {
  39. 'a'*i + 'b'*i + 'c'*j + 'd'*j
  40. for i, j in product(range(max_length + 1), repeat=2)
  41. if i + i + j + j <= max_length and i >= j
  42. }
  43.  
  44. # Show the comparison results
  45. print(f'Correct for words up to length {max_length}:',
  46. generated_words == correct_words)
  47. def sort(words):
  48. return sorted(words, key=lambda word: (len(word), word))
  49. print(f"That's {len(correct_words)} words:", sort(correct_words))
  50. missing = sort(correct_words - generated_words)
  51. invalid = sort(generated_words - correct_words)
  52. print(f'missing {len(missing)} words:', missing)
  53. print(f'invalid {len(invalid)} words:', invalid)
  54. print('Words (including non-terminals)')
  55. print(' encountered by grammar:', len(words))
Success #stdin #stdout 3.49s 34620KB
stdin
Standard input is empty
stdout
Correct for words up to length 18: True
That's 30 words: ['', 'ab', 'aabb', 'abcd', 'aaabbb', 'aabbcd', 'aaaabbbb', 'aaabbbcd', 'aabbccdd', 'aaaaabbbbb', 'aaaabbbbcd', 'aaabbbccdd', 'aaaaaabbbbbb', 'aaaaabbbbbcd', 'aaaabbbbccdd', 'aaabbbcccddd', 'aaaaaaabbbbbbb', 'aaaaaabbbbbbcd', 'aaaaabbbbbccdd', 'aaaabbbbcccddd', 'aaaaaaaabbbbbbbb', 'aaaaaaabbbbbbbcd', 'aaaaaabbbbbbccdd', 'aaaaabbbbbcccddd', 'aaaabbbbccccdddd', 'aaaaaaaaabbbbbbbbb', 'aaaaaaaabbbbbbbbcd', 'aaaaaaabbbbbbbccdd', 'aaaaaabbbbbbcccddd', 'aaaaabbbbbccccdddd']
missing 0 words: []
invalid 0 words: []
Words (including non-terminals)
  encountered by grammar: 167124