fork download
  1. import re
  2.  
  3. sents = ["Who's kid are you? my ph. is +1-6466461022.Bye!", "Who's kid are you? my ph. is +1-6466461022.'Bye!'"]
  4. for sent in sents:
  5. print( [x for x in re.split(r"([^'\w\s]|'(?![^\W\d_])|(?<![^\W\d_])')|(?='(?<=[^\W\d_]')(?=[^\W\d_]))|\s+", sent) if x] )
  6.  
  7. # => ['Who', "'s", 'kid', 'are', 'you', '?', 'my', 'ph', '.', 'is', '+', '1', '-', '6466461022', '.', 'Bye', '!']
  8. # => ['Who', "'s", 'kid', 'are', 'you', '?', 'my', 'ph', '.', 'is', '+', '1', '-', '6466461022', '.', "'", 'Bye', '!', "'"]
  9.  
  10.  
Success #stdin #stdout 0.03s 9472KB
stdin
Standard input is empty
stdout
['Who', "'s", 'kid', 'are', 'you', '?', 'my', 'ph', '.', 'is', '+', '1', '-', '6466461022', '.', 'Bye', '!']
['Who', "'s", 'kid', 'are', 'you', '?', 'my', 'ph', '.', 'is', '+', '1', '-', '6466461022', '.', "'", 'Bye', '!', "'"]