import re s = "Master of the universe\n\n(Jul 26, 2023 - 1:00pm)\n\n(Interviewee: Marina)\n\n\n\n(00:00:05 - 00:00:09)\n\n\t Alice: This project. Uh my job is to ask lots of questions.\n\n\n\n(00:00:10 - 00:00:11)\n\n\t Marina: What is it?\n\n\n\n(00:00:11 - 00:00:14)\n\n\t Alice: Uh uh impartially.\n\n\n\n(00:00:15 - 00:00:18)\n\n\t Alice: Uh so suddenly I don't work for a particular brand.\n\n\n\n(00:00:19 - 00:00:21)\n\n\t Alice: Uh I'm self-employed,\n\n\n\n(00:00:21 - 00:00:21)\n\n\t Marina: M M.\n\n\n\n(00:00:21 - 00:00:32)\n\n\t Alice: I do group interviews with lots of brands, from toothpaste to the product we're going to talk about today.\n\n\n\n(00:00:32 - 00:00:32)\n\n\t Marina: Okay.\n\n\n\n(00:00:33 - 00:00:37)\n\n\t Alice: Uh today we're gonna talk for an hour uh.\n\n\n\n(00:00:36 - 00:00:36)\n\n\t Marina: Okay.\n\n\n\n(00:00:37 - 00:00:39)\n\n\t " pattern = r"^\t[^\S\n]*([^:\n]+):[^\S\n]*(.+)" print(re.findall(pattern, s, re.M))
Standard input is empty
[('Alice', 'This project. Uh my job is to ask lots of questions.'), ('Marina', 'What is it?'), ('Alice', 'Uh uh impartially.'), ('Alice', "Uh so suddenly I don't work for a particular brand."), ('Alice', "Uh I'm self-employed,"), ('Marina', 'M M.'), ('Alice', "I do group interviews with lots of brands, from toothpaste to the product we're going to talk about today."), ('Marina', 'Okay.'), ('Alice', "Uh today we're gonna talk for an hour uh."), ('Marina', 'Okay.')]