use 5.12.1;
use encoding 'utf-8';
$| = 1;
my @test_boundary = ( # EXPECTED RESULT:
'"abc def"', # '«abc def»'
'"abc "d e f" ghi"', # '«abc «d e f» ghi»'
'"abc "d e f""', # '«abc «d e f»»'
'"abc "d e f"', # '«abc "d e f»'
'"abc "d" "e" f"', # '«abc «d» «e» f»'
# below won't work with \b when locale enabled
'"100 Естонiï"', # '«100 Естонiï»'
'"äöõ "ä õ ü" ï"', # '«äöõ «ä õ ü» ï»'
'"äöõ "ä õ ü""', # '«äöõ «ä õ ü»»'
'"äöõ "ä õ ü"', # '«äöõ «ä õ ü»'
'"äöõ "ä" "õ" ï"', # '«äöõ «ä» «õ» ï»'
);
my @test_emulate = ( # EXPECTED RESULT:
'"100 Естонiï"', # '«100 Естонiï»'
'"äöõ "ä õ ü" ï"', # '«äöõ «ä õ ü» ï»'
'"äöõ "ä õ ü""', # '«äöõ «ä õ ü»»'
'"äöõ "ä õ ü"', # '«äöõ "ä õ ü»'
'"äöõ "ä" "õ" ï"', # '«äöõ «ä» «õ» ï»'
);
say "BOUNDARY";
for my $sentence ( @test_boundary ) {
my $quote_count = ( $sentence =~ tr/"/"/ );
for ( my $i = 0 ; $i <= $quote_count ; $i += 2 ) {
"( # first qoute, start capture
[\w\.]+? # suva word-char
.*?\b[\.,?!»]*? # any char followed boundary + opt. punctuation
)" # stop capture, ending quote
/«$1»/xg; # change to fancy
}
say $sentence;
}
say "EMULATE";
for my $sentence ( @test_boundary ) {
my $quote_count = ( $sentence =~ tr/"/"/ );
for ( my $i = 0 ; $i <= $quote_count ; $i += 2 ) {
"( # first qoute, start capture
[\p{Word}\.]+? # at least one word-char or point
.*?(?<=\p{Word}) # any char followed boundary
[\.,?!»]*? # optional punctuation
)" # stop capture, ending quote
/«$1»/gx; # change to fancy
}
say $sentence;
}
CnVzZSA1LjEyLjE7CnVzZSBlbmNvZGluZyAndXRmLTgnOwokfCA9IDE7CgpteSBAdGVzdF9ib3VuZGFyeSA9ICggICMgRVhQRUNURUQgUkVTVUxUOgogICciYWJjIGRlZiInLCAgICAgICAgICMgJ8KrYWJjIGRlZsK7JwogICciYWJjICJkIGUgZiIgZ2hpIicsICMgJ8KrYWJjIMKrZCBlIGbCuyBnaGnCuycKICAnImFiYyAiZCBlIGYiIicsICAgICAjICfCq2FiYyDCq2QgZSBmwrvCuycKICAnImFiYyAiZCBlIGYiJywgICAgICAjICfCq2FiYyAiZCBlIGbCuycKICAnImFiYyAiZCIgImUiIGYiJywgICAjICfCq2FiYyDCq2TCuyDCq2XCuyBmwrsnCiAgIyBiZWxvdyB3b24ndCB3b3JrIHdpdGggXGIgd2hlbiBsb2NhbGUgZW5hYmxlZAogICciMTAwINCV0YHRgtC+0L1pw68iJywgICAgICMgICfCqzEwMCDQldGB0YLQvtC9acOvwrsnCiAgJyLDpMO2w7UgIsOkIMO1IMO8IiDDryInLCAgICMgJ8Krw6TDtsO1IMKrw6Qgw7Ugw7zCuyDDr8K7JwogICciw6TDtsO1ICLDpCDDtSDDvCIiJywgICAgICMgJ8Krw6TDtsO1IMKrw6Qgw7Ugw7zCu8K7JwogICciw6TDtsO1ICLDpCDDtSDDvCInLCAgICAgICMgJ8Krw6TDtsO1IMKrw6Qgw7Ugw7zCuycKICAnIsOkw7bDtSAiw6QiICLDtSIgw68iJywgICAjICfCq8Okw7bDtSDCq8OkwrsgwqvDtcK7IMOvwrsnCik7CgpteSBAdGVzdF9lbXVsYXRlID0gKCAgICMgRVhQRUNURUQgUkVTVUxUOgogICciMTAwINCV0YHRgtC+0L1pw68iJywgICAgICMgJ8KrMTAwINCV0YHRgtC+0L1pw6/CuycKICAnIsOkw7bDtSAiw6Qgw7Ugw7wiIMOvIicsICAgIyAnwqvDpMO2w7UgwqvDpCDDtSDDvMK7IMOvwrsnCiAgJyLDpMO2w7UgIsOkIMO1IMO8IiInLCAgICAgIyAnwqvDpMO2w7UgwqvDpCDDtSDDvMK7wrsnCiAgJyLDpMO2w7UgIsOkIMO1IMO8IicsICAgICAgIyAnwqvDpMO2w7UgIsOkIMO1IMO8wrsnCiAgJyLDpMO2w7UgIsOkIiAiw7UiIMOvIicsICAgIyAnwqvDpMO2w7UgwqvDpMK7IMKrw7XCuyDDr8K7JwopOwoKc2F5ICJCT1VOREFSWSI7CmZvciBteSAkc2VudGVuY2UgKCBAdGVzdF9ib3VuZGFyeSApIHsKICBteSAkcXVvdGVfY291bnQgPSAoICRzZW50ZW5jZSA9fiB0ci8iLyIvICk7CgogIGZvciAoIG15ICRpID0gMCA7ICRpIDw9ICRxdW90ZV9jb3VudCA7ICRpICs9IDIgKSB7CiAgICAkc2VudGVuY2UgPX4gcy8KICAgICAgIiggICAgICAgICAgICAgICAgICAgICAgICAgICMgZmlyc3QgcW91dGUsIHN0YXJ0IGNhcHR1cmUKICAgICAgICBbXHdcLl0rPyAgICAgICAgICAgICMgc3V2YSB3b3JkLWNoYXIKICAgICAgICAuKj9cYltcLiw/IcK7XSo/ICAgICAgICAgICAjIGFueSBjaGFyIGZvbGxvd2VkIGJvdW5kYXJ5ICsgb3B0LiBwdW5jdHVhdGlvbgogICAgICApIiAgICAgICAgICAgICAgICAgICAgICAgICAgIyBzdG9wIGNhcHR1cmUsIGVuZGluZyBxdW90ZQogICAgICAvwqskMcK7L3hnOyAgICAgICAgICAgICAgICAgICAjIGNoYW5nZSB0byBmYW5jeQogIH0KICBzYXkgJHNlbnRlbmNlOwp9CgpzYXkgIkVNVUxBVEUiOwpmb3IgbXkgJHNlbnRlbmNlICggQHRlc3RfYm91bmRhcnkgKSB7CiAgbXkgJHF1b3RlX2NvdW50ID0gICggJHNlbnRlbmNlID1+IHRyLyIvIi8gKTsKCiAgZm9yICggbXkgJGkgPSAwIDsgJGkgPD0gJHF1b3RlX2NvdW50IDsgJGkgKz0gMiApIHsKICAgICRzZW50ZW5jZSA9fiBzLwogICAgICAiKCAgICAgICAgICAgICAgICAgICAgICAgICAjIGZpcnN0IHFvdXRlLCBzdGFydCBjYXB0dXJlCiAgICAgIFtccHtXb3JkfVwuXSs/ICAgICAgICAgICAgICMgYXQgbGVhc3Qgb25lIHdvcmQtY2hhciBvciBwb2ludAogICAgICAuKj8oPzw9XHB7V29yZH0pICAgICAgICAgICAjIGFueSBjaGFyIGZvbGxvd2VkIGJvdW5kYXJ5IAogICAgICBbXC4sPyHCu10qPyAgICAgICAgICAgICAgICAgIyBvcHRpb25hbCBwdW5jdHVhdGlvbgogICAgICApIiAgICAgICAgICAgICAgICAgICAgICAgICAjIHN0b3AgY2FwdHVyZSwgZW5kaW5nIHF1b3RlCiAgICAgIC/CqyQxwrsvZ3g7ICAgICAgICAgICAgICAgICAgIyBjaGFuZ2UgdG8gZmFuY3kKICB9CiAgc2F5ICRzZW50ZW5jZTsKfQ==