x <- readLines(textConnection("ÁÀDFDS (artist 1) 1 Lorem ipsum dolor sit amet, consectetur adipiscing elit. AB (artist 2) 2 Nulla sollicitudin elit in purus egestas, in placerat velit iaculis. BBDDED (artist 3) az*ù*ù*ù (bad string) 3 Nunc et eros eget turpis sollicitudin mollis id et mi. 4 Mauris condimentum velit eu consequat feugiat. 5 Suspendisse sit amet metus vitae est eleifend tincidunt. ÉÈDFSF (artist 4) 6 Sed cursus augue in tempus scelerisque. ...gdgdgdg (bad string) 7 in commodo enim in laoreet gravida.")) x[grepl("^\\pN+|\\p{Lu}[\\p{Lu}']", x, perl=TRUE)] cat("\n\n", paste(rep('-', 50), collapse=""), "\n\n") x[grepl("^\\d+|(?![×Þß÷þø])[A-ZÀ-ÿ][A-ZÀ-ÿ']", x, perl=TRUE)]