<?php
// words to be removed
'der' => 1,
'die' => 1,
'das' => 1,
'the' => 1);
# use words as key for better performance
// remove stopwords from string
function strip_stopwords($str = "")
{
global $stopwords;
// 1.) break string into words
// [^-\w\'] matches characters, that are not [0-9a-zA-Z_-']
// if input is unicode/utf-8, the u flag is needed: /pattern/u
$words = preg_split('/[^-\w\']+/', $str, -1, PREG_SPLIT_NO_EMPTY
);
// 2.) if we have at least 2 words, remove stopwords
{
$words = array_filter($words, function ($w) use (&$stopwords) { # if utf-8: mb_strtolower($w, "utf-8")
});
}
// check if not too much was removed such as "the the" would return empty
return $str;
}
// test it
echo strip_stopwords("The Hobbit das foo, der");
PD9waHAKCi8vIHdvcmRzIHRvIGJlIHJlbW92ZWQKJHN0b3B3b3JkcyA9IGFycmF5KAonZGVyJyA9PiAxLAonZGllJyA9PiAxLAonZGFzJyA9PiAxLAondGhlJyA9PiAxKTsKIyB1c2Ugd29yZHMgYXMga2V5IGZvciBiZXR0ZXIgcGVyZm9ybWFuY2UKCi8vIHJlbW92ZSBzdG9wd29yZHMgZnJvbSBzdHJpbmcKZnVuY3Rpb24gc3RyaXBfc3RvcHdvcmRzKCRzdHIgPSAiIikKewogIGdsb2JhbCAkc3RvcHdvcmRzOwogIAogIC8vIDEuKSBicmVhayBzdHJpbmcgaW50byB3b3JkcwogIC8vIFteLVx3XCddIG1hdGNoZXMgY2hhcmFjdGVycywgdGhhdCBhcmUgbm90IFswLTlhLXpBLVpfLSddCiAgLy8gaWYgaW5wdXQgaXMgdW5pY29kZS91dGYtOCwgdGhlIHUgZmxhZyBpcyBuZWVkZWQ6IC9wYXR0ZXJuL3UKICAkd29yZHMgPSBwcmVnX3NwbGl0KCcvW14tXHdcJ10rLycsICRzdHIsIC0xLCBQUkVHX1NQTElUX05PX0VNUFRZKTsKICAKICAvLyAyLikgaWYgd2UgaGF2ZSBhdCBsZWFzdCAyIHdvcmRzLCByZW1vdmUgc3RvcHdvcmRzCiAgaWYoY291bnQoJHdvcmRzKSA+IDEpCiAgewoJJHdvcmRzID0gYXJyYXlfZmlsdGVyKCR3b3JkcywgZnVuY3Rpb24gKCR3KSB1c2UgKCYkc3RvcHdvcmRzKSB7CgkgIHJldHVybiAhaXNzZXQoJHN0b3B3b3Jkc1tzdHJ0b2xvd2VyKCR3KV0pOwoJICAjIGlmIHV0Zi04OiBtYl9zdHJ0b2xvd2VyKCR3LCAidXRmLTgiKQoJfSk7CiAgfQogIAogIC8vIGNoZWNrIGlmIG5vdCB0b28gbXVjaCB3YXMgcmVtb3ZlZCBzdWNoIGFzICJ0aGUgdGhlIiB3b3VsZCByZXR1cm4gZW1wdHkKICBpZighZW1wdHkoJHdvcmRzKSkKCXJldHVybiBpbXBsb2RlKCIgIiwgJHdvcmRzKTsKICByZXR1cm4gJHN0cjsKfQoKLy8gdGVzdCBpdAplY2hvIHN0cmlwX3N0b3B3b3JkcygiVGhlIEhvYmJpdCBkYXMgZm9vLCBkZXIiKTsK