import re text = '''5.3 x 2.5 cm 11 x 11 mm 7 mm 13 x 12 x 14 mm 13x12cm''' x = "\.\d{1,2}|\d{1,4}\.?\d{0,2}|\d{5}\.?\d?|\d{6}\.?" by = "(?: )?(?:by|x)(?: )?" cm = "(?:mm|cm|millimeter|centimeter|millimeters|centimeters)" x_cm = "(?:(?:" + x + " *(?:to|\-) *" + cm + ")" + "|(?:" + x + cm + "))" xy_cm = "(?:(?:" + x + cm + by + x + cm + ")" +"|(?:" + x + by + x + cm + ")" +"|(?:" + x + by + x + "))" xyz_cm = "(?:(?:" + x + cm + by + x + cm + by + x + cm + ")" + "|(?:" + x + by + x + by + x + cm + ")" + "|(?:" + x + by + x + by + x + "))" m = "(?:(?:" + xyz_cm + ")" + "|(?:" + xy_cm + ")" + "|(?:" + x_cm + "))" print(m) a = re.compile(m) print a.findall(text)