df<- structure( list( name = structure( c( 1L , 3L , 2L ) , .Label = c( "James" ,
"Jim" , "John" ) , class = "factor" ) , age = c( 34L , 30L , 27L ) , message = structure( 1 : 3 , .Label = c( "hello, my name is James. " ,
"hello, my name is John. Here is my favourite website https://stackoverflow.com" ,
"Hi! I'm another persoon whose name begins with a J! Here is something that should be filtered out: <filter>"
) , class = "factor" ) ) , .Names = c( "name" , "age" , "message" ) , class = "data.frame" , row.names = c( NA,
- 3L ) )
df$message <- gsub( "(?:\\ s+|^)\\ S*(?<!\\ w)(?:https?|<filter>)(?!\\ w)\\ S*" , "" , df$message, perl= TRUE)
df$message
ZGY8LXN0cnVjdHVyZShsaXN0KG5hbWUgPSBzdHJ1Y3R1cmUoYygxTCwgM0wsIDJMKSwgLkxhYmVsID0gYygiSmFtZXMiLCAKIkppbSIsICJKb2huIiksIGNsYXNzID0gImZhY3RvciIpLCBhZ2UgPSBjKDM0TCwgMzBMLCAyN0wpLCBtZXNzYWdlID0gc3RydWN0dXJlKDE6MywgLkxhYmVsID0gYygiaGVsbG8sIG15IG5hbWUgaXMgSmFtZXMuICIsIAoiaGVsbG8sIG15IG5hbWUgaXMgSm9obi4gSGVyZSBpcyBteSBmYXZvdXJpdGUgd2Vic2l0ZSBodHRwczovL3N0YWNrb3ZlcmZsb3cuY29tIiwgCiJIaSEgSSdtIGFub3RoZXIgcGVyc29vbiB3aG9zZSBuYW1lIGJlZ2lucyB3aXRoIGEgSiEgSGVyZSBpcyBzb21ldGhpbmcgdGhhdCBzaG91bGQgYmUgZmlsdGVyZWQgb3V0OiA8ZmlsdGVyPiIKKSwgY2xhc3MgPSAiZmFjdG9yIikpLCAuTmFtZXMgPSBjKCJuYW1lIiwgImFnZSIsICJtZXNzYWdlIiksIGNsYXNzID0gImRhdGEuZnJhbWUiLCByb3cubmFtZXMgPSBjKE5BLCAKLTNMKSkKZGYkbWVzc2FnZSA8LSBnc3ViKCIoPzpcXHMrfF4pXFxTKig/PCFcXHcpKD86aHR0cHM/fDxmaWx0ZXI+KSg/IVxcdylcXFMqIiwgIiIsIGRmJG1lc3NhZ2UsIHBlcmw9VFJVRSkKZGYkbWVzc2FnZQ==