fork download
  1. test_string <- "we need a test for Andrew Smith or other names like Samuel L Jackson, but we Don't Want Weird Instances Where more stuff is capitalized, but we do want where the entire name is capitalized, like DEREK JETER or MIKE NELSON TROUT"
  2. block <- "\\b\\p{Lu}\\p{L}*(?:\\s+\\p{Lu}\\p{L}*)"
  3. regex <- paste0("(*UCP)", block, "{3,}\\b(*SKIP)(*F)|", block, "{1,2}\\b")
  4. ##regex <- "(*UCP)\b\p{Lu}\p{L}*(?:\s+\p{Lu}\p{L}*){3,}\b(*SKIP)(*F)|\b\p{Lu}\p{L}*(?:\s+\p{Lu}\p{L}*){1,2}\b"
  5. regmatches(test_string, gregexpr(regex, test_string, perl=TRUE))
  6.  
Success #stdin #stdout 0.19s 185728KB
stdin
Standard input is empty
stdout
[[1]]
[1] "Andrew Smith"      "Samuel L Jackson"  "DEREK JETER"      
[4] "MIKE NELSON TROUT"