y <- c("1. StÄ…ck is great and awesome", "2. Stack") regmatches(y, regexpr("\\p{L}+(?:\\s+\\p{L}+){0,2}", y, perl=TRUE)) regmatches(y, regexpr("[[:alpha:]]+(?:[[:space:]]+[[:alpha:]]+){0,2}", y))