import re
from itertools import product
start = "S"
rules = {
"S": ["", "ABR", "ABRCD"],
"R": ["", "BR", "BRC"],
"AB": ["ab"],
"bB": ["Bb"],
"aBb": ["aabb"],
"CD": ["cd"],
"Cc": ["cC"],
"cCd": ["ccdd"],
}
empties = "[SR]"
max_length = 18
# Generate words with the grammar
words = horizon = {start}
heads = '|'.join(rules)
heads = f'(?=({heads}))'
while horizon:
horizon = {
new_word
for word in horizon
for match in re.finditer(heads, word)
for head, start in [[match[1], match.start()]]
for body in rules[head]
for new_word in [word[:start] + body + word[start + len(head):]]
if len(re.sub(empties, '', new_word)) <= max_length
} - words
words |= horizon
generated_words = {word for word in words
if not word or word.islower()}
# Generate words in a simple was as reference
correct_words = {
'a'*i + 'b'*i + 'c'*j + 'd'*j
for i, j in product(range(max_length + 1), repeat=2)
if i + i + j + j <= max_length and i >= j
}
# Show the comparison results
print(f'Correct for words up to length {max_length}:',
generated_words == correct_words)
def sort(words):
return sorted(words, key=lambda word: (len(word), word))
print(f"That's {len(correct_words)} words:", sort(correct_words))
missing = sort(correct_words - generated_words)
invalid = sort(generated_words - correct_words)
print(f'missing {len(missing)} words:', missing)
print(f'invalid {len(invalid)} words:', invalid)
print('Words (including non-terminals)')
print(' encountered by grammar:', len(words))
aW1wb3J0IHJlCmZyb20gaXRlcnRvb2xzIGltcG9ydCBwcm9kdWN0CgpzdGFydCA9ICJTIgpydWxlcyA9IHsKICAgICJTIjogWyIiLCAiQUJSIiwgIkFCUkNEIl0sCiAgICAiUiI6IFsiIiwgIkJSIiwgIkJSQyJdLAogICAgIkFCIjogWyJhYiJdLAogICAgImJCIjogWyJCYiJdLAogICAgImFCYiI6IFsiYWFiYiJdLAogICAgIkNEIjogWyJjZCJdLAogICAgIkNjIjogWyJjQyJdLAogICAgImNDZCI6IFsiY2NkZCJdLAp9CmVtcHRpZXMgPSAiW1NSXSIKCm1heF9sZW5ndGggPSAxOAoKIyBHZW5lcmF0ZSB3b3JkcyB3aXRoIHRoZSBncmFtbWFyCndvcmRzID0gaG9yaXpvbiA9IHtzdGFydH0KaGVhZHMgPSAnfCcuam9pbihydWxlcykKaGVhZHMgPSBmJyg/PSh7aGVhZHN9KSknCndoaWxlIGhvcml6b246CiAgICBob3Jpem9uID0gewogICAgICAgIG5ld193b3JkCiAgICAgICAgZm9yIHdvcmQgaW4gaG9yaXpvbgogICAgICAgIGZvciBtYXRjaCBpbiByZS5maW5kaXRlcihoZWFkcywgd29yZCkKICAgICAgICBmb3IgaGVhZCwgc3RhcnQgaW4gW1ttYXRjaFsxXSwgbWF0Y2guc3RhcnQoKV1dCiAgICAgICAgZm9yIGJvZHkgaW4gcnVsZXNbaGVhZF0KICAgICAgICBmb3IgbmV3X3dvcmQgaW4gW3dvcmRbOnN0YXJ0XSArIGJvZHkgKyB3b3JkW3N0YXJ0ICsgbGVuKGhlYWQpOl1dCiAgICAgICAgaWYgbGVuKHJlLnN1YihlbXB0aWVzLCAnJywgbmV3X3dvcmQpKSA8PSBtYXhfbGVuZ3RoCiAgICB9IC0gd29yZHMKICAgIHdvcmRzIHw9IGhvcml6b24KZ2VuZXJhdGVkX3dvcmRzID0ge3dvcmQgZm9yIHdvcmQgaW4gd29yZHMKICAgICAgICAgICAgICAgICAgIGlmIG5vdCB3b3JkIG9yIHdvcmQuaXNsb3dlcigpfQoKIyBHZW5lcmF0ZSB3b3JkcyBpbiBhIHNpbXBsZSB3YXMgYXMgcmVmZXJlbmNlCmNvcnJlY3Rfd29yZHMgPSB7CiAgICAnYScqaSArICdiJyppICsgJ2MnKmogKyAnZCcqagogICAgZm9yIGksIGogaW4gcHJvZHVjdChyYW5nZShtYXhfbGVuZ3RoICsgMSksIHJlcGVhdD0yKQogICAgaWYgaSArIGkgKyBqICsgaiA8PSBtYXhfbGVuZ3RoIGFuZCBpID49IGoKfQoKIyBTaG93IHRoZSBjb21wYXJpc29uIHJlc3VsdHMKcHJpbnQoZidDb3JyZWN0IGZvciB3b3JkcyB1cCB0byBsZW5ndGgge21heF9sZW5ndGh9OicsCiAgICAgIGdlbmVyYXRlZF93b3JkcyA9PSBjb3JyZWN0X3dvcmRzKQpkZWYgc29ydCh3b3Jkcyk6CiAgICByZXR1cm4gc29ydGVkKHdvcmRzLCBrZXk9bGFtYmRhIHdvcmQ6IChsZW4od29yZCksIHdvcmQpKQpwcmludChmIlRoYXQncyB7bGVuKGNvcnJlY3Rfd29yZHMpfSB3b3JkczoiLCBzb3J0KGNvcnJlY3Rfd29yZHMpKQptaXNzaW5nID0gc29ydChjb3JyZWN0X3dvcmRzIC0gZ2VuZXJhdGVkX3dvcmRzKQppbnZhbGlkID0gc29ydChnZW5lcmF0ZWRfd29yZHMgLSBjb3JyZWN0X3dvcmRzKQpwcmludChmJ21pc3Npbmcge2xlbihtaXNzaW5nKX0gd29yZHM6JywgbWlzc2luZykKcHJpbnQoZidpbnZhbGlkIHtsZW4oaW52YWxpZCl9IHdvcmRzOicsIGludmFsaWQpCnByaW50KCdXb3JkcyAoaW5jbHVkaW5nIG5vbi10ZXJtaW5hbHMpJykKcHJpbnQoJyAgZW5jb3VudGVyZWQgYnkgZ3JhbW1hcjonLCBsZW4od29yZHMpKQ==
Correct for words up to length 18: True
That's 30 words: ['', 'ab', 'aabb', 'abcd', 'aaabbb', 'aabbcd', 'aaaabbbb', 'aaabbbcd', 'aabbccdd', 'aaaaabbbbb', 'aaaabbbbcd', 'aaabbbccdd', 'aaaaaabbbbbb', 'aaaaabbbbbcd', 'aaaabbbbccdd', 'aaabbbcccddd', 'aaaaaaabbbbbbb', 'aaaaaabbbbbbcd', 'aaaaabbbbbccdd', 'aaaabbbbcccddd', 'aaaaaaaabbbbbbbb', 'aaaaaaabbbbbbbcd', 'aaaaaabbbbbbccdd', 'aaaaabbbbbcccddd', 'aaaabbbbccccdddd', 'aaaaaaaaabbbbbbbbb', 'aaaaaaaabbbbbbbbcd', 'aaaaaaabbbbbbbccdd', 'aaaaaabbbbbbcccddd', 'aaaaabbbbbccccdddd']
missing 0 words: []
invalid 0 words: []
Words (including non-terminals)
encountered by grammar: 167124