text <- c("This tree is veeeeery tall")
words2keep <- c("tree", "tall")
p <- paste0('\\b(?:',paste(collapse='|',words2keep),')\\b(*SKIP)(*F)|([A-Za-z])\\1+')
## OR: p <- paste0('(*UCP)\\b(?:',paste(collapse='|',words2keep),')\\b(*SKIP)(*F)|(\\p{L})\\1+')
p
gsub(p, '\\1',text, perl=TRUE)
dGV4dCA8LSBjKCJUaGlzIHRyZWUgaXMgdmVlZWVlcnkgdGFsbCIpCndvcmRzMmtlZXAgPC0gYygidHJlZSIsICJ0YWxsIikKcCA8LSBwYXN0ZTAoJ1xcYig/OicscGFzdGUoY29sbGFwc2U9J3wnLHdvcmRzMmtlZXApLCcpXFxiKCpTS0lQKSgqRil8KFtBLVphLXpdKVxcMSsnKQojIyBPUjogcCA8LSBwYXN0ZTAoJygqVUNQKVxcYig/OicscGFzdGUoY29sbGFwc2U9J3wnLHdvcmRzMmtlZXApLCcpXFxiKCpTS0lQKSgqRil8KFxccHtMfSlcXDErJykKcApnc3ViKHAsICdcXDEnLHRleHQsIHBlcmw9VFJVRSk=