test <- data.frame (
Turn = c( "Hi. I'm you an' you are me cos" ,
"she'd've been so happy cos with all this stuff goin' on" ,
"but we're in this together, because y' know things happens" ,
"so you can't, cos well, ah because you know why!" ,
"not now because it's too late!" ) , stringsAsFactors = F)
rx <- "^\\ s*(?:\\ S+\\ s+){0,3}(?:cos|because)\\ b.*(*SKIP)(*F)|(?:\\ S+[\\ s,]+){4}\\ b(cos|because)\\ b"
Turn <- test[ grepl( rx, test$Turn, perl= TRUE) , ]
split <- strsplit( Turn, "\\ b(cos|because)\\ b" )
Index <- sapply( split, function ( x) lengths( strsplit( trimws( x[ [ 1 ] ] ) , "\\ s+" ) ) + 1 )
test <- data.frame ( Turn, Index, stringsAsFactors = F)
test
dGVzdCA8LSBkYXRhLmZyYW1lKAogIFR1cm4gPSBjKCJIaS4gSSdtIHlvdSBhbicgeW91IGFyZSBtZSBjb3MiLAogICAgICAgICAgInNoZSdkJ3ZlIGJlZW4gc28gaGFwcHkgY29zIHdpdGggYWxsIHRoaXMgc3R1ZmYgZ29pbicgb24iLAogICAgICAgICAgImJ1dCB3ZSdyZSBpbiB0aGlzIHRvZ2V0aGVyLCBiZWNhdXNlIHknIGtub3cgdGhpbmdzIGhhcHBlbnMiLAogICAgICAgICAgInNvIHlvdSBjYW4ndCwgY29zIHdlbGwsIGFoIGJlY2F1c2UgeW91IGtub3cgd2h5ISIsCiAgICAgICAgICAibm90IG5vdyBiZWNhdXNlIGl0J3MgdG9vIGxhdGUhIiksIHN0cmluZ3NBc0ZhY3RvcnMgPSBGKQpyeCA8LSAiXlxccyooPzpcXFMrXFxzKyl7MCwzfSg/OmNvc3xiZWNhdXNlKVxcYi4qKCpTS0lQKSgqRil8KD86XFxTK1tcXHMsXSspezR9XFxiKGNvc3xiZWNhdXNlKVxcYiIKVHVybiA8LSB0ZXN0W2dyZXBsKHJ4LCB0ZXN0JFR1cm4sIHBlcmw9VFJVRSksXQpzcGxpdCA8LSBzdHJzcGxpdChUdXJuLCAiXFxiKGNvc3xiZWNhdXNlKVxcYiIpCkluZGV4IDwtIHNhcHBseShzcGxpdCwgZnVuY3Rpb24oeCkgbGVuZ3RocyhzdHJzcGxpdCh0cmltd3MoeFtbMV1dKSwgIlxccysiKSkrMSkKdGVzdCA8LSBkYXRhLmZyYW1lKFR1cm4sIEluZGV4LCBzdHJpbmdzQXNGYWN0b3JzID0gRikKdGVzdA==