clean.text <- function(x)
{
# remove rt
x <- gsub("rt\\s", "", x)
# remove at
x <- gsub("@\\w+", "", x)
x <- gsub("\\b-\\b(*SKIP)(*F)|[[:punct:]]", "", x, perl=TRUE)
x <- gsub("[[:digit:]]+", "", x)
# remove http
x<- gsub("http\\w+", "", x)
x <-gsub("\\h{2,}", "", x, perl=TRUE)
x<- trimws(x)
x<- gsub("[^[:alnum:][:space:]'-]", " ", x)
return(x)
}
my_text <- " accident-prone http://w...content-available-to-author-only...e.com rt "
new_text <- clean.text(my_text)
new_text
Y2xlYW4udGV4dCA8LSBmdW5jdGlvbih4KQp7CiAgIyByZW1vdmUgcnQKICB4IDwtIGdzdWIoInJ0XFxzIiwgIiIsIHgpCiAgIyByZW1vdmUgYXQKICB4IDwtIGdzdWIoIkBcXHcrIiwgIiIsIHgpCiAgeCA8LSBnc3ViKCJcXGItXFxiKCpTS0lQKSgqRil8W1s6cHVuY3Q6XV0iLCAiIiwgeCwgcGVybD1UUlVFKQogIHggPC0gZ3N1YigiW1s6ZGlnaXQ6XV0rIiwgIiIsIHgpCiAgIyByZW1vdmUgaHR0cAogIHg8LSBnc3ViKCJodHRwXFx3KyIsICIiLCB4KQogIHggPC1nc3ViKCJcXGh7Mix9IiwgIiIsIHgsIHBlcmw9VFJVRSkKICB4PC0gdHJpbXdzKHgpCiAgeDwtIGdzdWIoIlteWzphbG51bTpdWzpzcGFjZTpdJy1dIiwgIiAiLCB4KQogIHJldHVybih4KQp9Cm15X3RleHQgPC0gIiAgYWNjaWRlbnQtcHJvbmUgIGh0dHA6Ly93Li4uY29udGVudC1hdmFpbGFibGUtdG8tYXV0aG9yLW9ubHkuLi5lLmNvbSAgcnQgIgpuZXdfdGV4dCA8LSBjbGVhbi50ZXh0KG15X3RleHQpCm5ld190ZXh0