test_str <- "Made out of wood, a book can contain many pages that are used to transmit information."
my_regex <- "(?i)(?=(\\b(?:(?:\\w+\\W+){5}|^(?:\\w+\\W+){0,4})(?:\\bbooks?|\\bpages?)\\b(?:(?:\\W+\\w+){5}|(?:\\W+\\w+){0,4}$)))"
m <- gregexpr(my_regex, test_str, perl=TRUE)
m <- lapply(m, function(i) {
attr(i, "match.length") <- attr(i, "capture.length")
i
})
regmatches(test_str, m)
dGVzdF9zdHIgPC0gIk1hZGUgb3V0IG9mIHdvb2QsIGEgYm9vayBjYW4gY29udGFpbiBtYW55IHBhZ2VzIHRoYXQgYXJlIHVzZWQgdG8gdHJhbnNtaXQgaW5mb3JtYXRpb24uIgoKbXlfcmVnZXggPC0gIig/aSkoPz0oXFxiKD86KD86XFx3K1xcVyspezV9fF4oPzpcXHcrXFxXKyl7MCw0fSkoPzpcXGJib29rcz98XFxicGFnZXM/KVxcYig/Oig/OlxcVytcXHcrKXs1fXwoPzpcXFcrXFx3Kyl7MCw0fSQpKSkiCgptIDwtIGdyZWdleHByKG15X3JlZ2V4LCB0ZXN0X3N0ciwgcGVybD1UUlVFKQptIDwtIGxhcHBseShtLCBmdW5jdGlvbihpKSB7CiAgICAgICBhdHRyKGksICJtYXRjaC5sZW5ndGgiKSA8LSBhdHRyKGksICJjYXB0dXJlLmxlbmd0aCIpCiAgICAgICBpCiAgICAgfSkKcmVnbWF0Y2hlcyh0ZXN0X3N0ciwgbSk=