import re
regex_whitespace = r'(\w+)\s+(\w+)\b'
pattern = re.compile(regex_whitespace) # this I just added after reviewing other related questions
labels_ls = ['word <= 0.01', 'word_two <= 0.23', 'word three <= 0.01']
# Loop through labels_ls to find any ngrams whitespace separated labels (i.e gilt maximal)
for i in labels_ls:
if re.match(regex_whitespace, i):
# replace the whitespace with a '_' to form gilt*maximal
new_string = re.sub(pattern, r'\1_\2', i)
print('new string: ', new_string)
aW1wb3J0IHJlCgpyZWdleF93aGl0ZXNwYWNlID0gcicoXHcrKVxzKyhcdyspXGInCnBhdHRlcm4gPSByZS5jb21waWxlKHJlZ2V4X3doaXRlc3BhY2UpICMgdGhpcyBJIGp1c3QgYWRkZWQgYWZ0ZXIgcmV2aWV3aW5nIG90aGVyIHJlbGF0ZWQgcXVlc3Rpb25zCmxhYmVsc19scyA9IFsnd29yZCA8PSAwLjAxJywgJ3dvcmRfdHdvIDw9IDAuMjMnLCAnd29yZCB0aHJlZSA8PSAwLjAxJ10KIyBMb29wIHRocm91Z2ggbGFiZWxzX2xzIHRvIGZpbmQgYW55IG5ncmFtcyB3aGl0ZXNwYWNlIHNlcGFyYXRlZCBsYWJlbHMgKGkuZSBnaWx0IG1heGltYWwpCmZvciBpIGluIGxhYmVsc19sczoKICAgIGlmIHJlLm1hdGNoKHJlZ2V4X3doaXRlc3BhY2UsIGkpOgogICAgICAgICMgcmVwbGFjZSB0aGUgd2hpdGVzcGFjZSB3aXRoIGEgJ18nIHRvIGZvcm0gZ2lsdCptYXhpbWFsCiAgICAgICAgbmV3X3N0cmluZyA9IHJlLnN1YihwYXR0ZXJuLCByJ1wxX1wyJywgaSkKICAgICAgICBwcmludCgnbmV3IHN0cmluZzogJywgbmV3X3N0cmluZyk=