fork download
  1. import re
  2.  
  3. def clean_text(text):
  4. text = str(text).lower()
  5. text = re.sub(r'#(\w+)', r'<\1>', text)
  6. text = re.sub(r'\n', ' ', text) # Remove /n
  7. text = re.sub(r'@[A-Za-z0-9]+', '<user>', text) # Remove and replace @mention
  8. text = re.sub(r'RT\s+', '', text) # Remove RT
  9. text = re.sub(r'https?://\S+\b/?', '<url>', text) # Remove and replace links
  10. return text
  11.  
  12. print(clean_text("@Marcorossi hanno ragione I #novax htt"+"p://www.asfag.com/"))
Success #stdin #stdout 0.03s 9720KB
stdin
Standard input is empty
stdout
<user> hanno ragione i <novax> <url>