import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
// TODO: add MoviePathMatcher
// TODO: add TvShowPathMatcher
// TODO: add other tvShow
public class TestScraper {
// for stdin
// for file input
// FileReader inputFileReader = new FileReader("input.txt");
while ((test = reader.readLine()) != null) {
getMatch(getFileNameWithoutExtension(test));
println();
}
}
/**
* Matches everything. Tries to strip away all junk, not very reliable.
* <p>
* Process is as follows:
* <ul>
* <li> Start with filename without extension: "100. [DVD]Starship_Troopers_1995.-HDrip--IT"
* <li> Remove potential starting numbering of collections "[DVD]Starship_Troopers_1995.-HDrip--IT"
* <li> Extract last year if any: "[DVD]Starship_Troopers_.-HDrip--IT"
* <li> Remove anything in brackets: "Starship_Troopers_.-HDrip--IT"
* <li> Assume from here on that the title is first followed by junk
* <li> Trim CasE sensitive junk: "Starship_Troopers_.-HDrip" ("it" could be part of the movie name, "IT" probably not)
* <li> Remove separators: "Starship Troopers HDrip"
* <li> Trim junk case insensitive: "Starship Troopers"
* </ul>
*/
private static void getMatch
(String input
) { // TODO test 3rd party denoise pattern
// denoise filter Default = @"(([\(\{\[]|\b)((576|720|1080)[pi]|dir(ectors )?cut|dvd([r59]|rip|scr(eener)?)|(avc)?hd|wmv|ntsc|pal|mpeg|dsr|r[1-5]|bd[59]|dts|ac3|blu(-)?ray|[hp]dtv|stv|hddvd|xvid|divx|x264|dxva|(?-i)FEST[Ii]VAL|L[iI]M[iI]TED|[WF]S|PROPER|REPACK|RER[Ii]P|REAL|RETA[Ii]L|EXTENDED|REMASTERED|UNRATED|CHRONO|THEATR[Ii]CAL|DC|SE|UNCUT|[Ii]NTERNAL|[DS]UBBED)([\]\)\}]|\b)(-[^\s]+$)?)")]
println("input : " + name);
// extract the last year from the string
// matches "[space or punctuation/brackets etc]year", year is group 1
final Pattern YEAR_PATTERN = Pattern.compile("[\\s\\p{Punct}]((?:19|20)\\d{2})(?!\\d)");
Matcher matcher = YEAR_PATTERN.matcher(name);
int start = 0;
int stop = 0;
boolean found = false;
while (matcher.find()) {
found = true;
start = matcher.start(1);
stop = matcher.end(1);
}
// get the last match and extract it from the string
if (found) {
year = name.substring(start, stop);
name = name.substring(0, start) + name.substring(stop);
}
println("release year : %s year:%s", name, year);
// Strip out starting numbering for collections
// Matches "1. ", "1) ", "1 - ", "1.-.", "1._"... but not "1.Foo" or "1-Foo" ..
final Pattern LEADING_NUMBERING = Pattern.compile("^(\\d+([.)][\\s\\p{Punct}]+|\\s+\\p{Punct}[\\p{Punct}\\s]*))*");
final Pattern LEADING_NUMBERING2 = Pattern.compile("^([0-9]+[ _]*[\\-\\.]+[ _]*)*");
name = replaceAll(name, "", LEADING_NUMBERING);
println("remove numbering : " + name);
// Strip out everything else in brackets <[{( .. )})>, most of the time teams names, etc
final Pattern BRACKETS = Pattern.compile("[<({\\[].+?[>)}\\]]");
final Pattern BRACKETS2 = Pattern.compile("[<\\(\\[\\{].+?[>\\)\\]\\}]");
name = replaceAll(name, "", BRACKETS);
println("brackets : " + name);
// strip away known case sensitive garbage
name = cutOffBeforeFirstMatch(name, GARBAGE_CASESENSITIVE_PATTERNS);
println("CaSe junk : " + name);
// removes all punctuation characters besides ' Also does apostrophe and Acronym replacement
// replace all remaining whitespace & punctuation with a single space
// break what does removeInnerAndOutterSeparatorJunk(name);
// -> this is unifyApostrophes(name)
// replaces alternative apostrophes with a simple '
// besides the plain ' there is the typographic ’ and ‘ which is actually not an apostrophe
final char[] ALTERNATE_APOSTROPHES = new char[]{'’', '‘'};
name = replaceAllChars(name, ALTERNATE_APOSTROPHES, '\'');
println("apostrophes : " + name);
// -> this is replaceAcronyms(name)
// Matches dots in between Uppercase letters e.g. in "E.T.", "S.H.I.E.L.D." but not a "a.b.c."
// replaces "S.H.I.E.L.D." with "SHIELD", only uppercase letters
// Last dot is kept "a.F.O.O.is.foo" => "a.FOO.is.foo"
final Pattern ACRONYM_DOTS = Pattern.compile("(?<=(\\b|[._])\\p{Lu})[.](?=\\p{Lu}([.]|$))");
name = replaceAll(name, "", ACRONYM_DOTS);
println("acronyms : " + name);
// -> this is the end of removeInnerAndOutterSeparatorJunk(name)
// ( whitespace | punctuation)+, matches dots, spaces, brackets etc
final Pattern MULTI_NON_CHARACTER_PATTERN = Pattern.compile("[\\s\\p{Punct}&&[^']]+");
name = replaceAll(name, " ", MULTI_NON_CHARACTER_PATTERN).trim();
println("separators : " + name);
// append a " " to aid next step
// > "Foo bar 1080p AC3 " to find e.g. " AC3 "
name = name + " ";
// try to remove more garbage, this time " garbage " syntax
// method will compare with lowercase name automatically
name = cutOffBeforeFirstMatch(name, GARBAGE_LOWERCASE);
println("lowercase junk : " + name);
name = name.trim();
println("RESULT : %s year:%s", name, year);
}
// Most of the common garbage in movies name we want to strip out
// (they can be part of the name or correspond to extensions as well).
private static final String[] GARBAGE_LOWERCASE
= { " dvdrip ", " dvd rip ", "dvdscreener ", " dvdscr ", " dvd scr ",
" brrip ", " br rip ", " bdrip", " bd rip ", " blu ray ", " bluray ",
" hddvd ", " hd dvd ", " hdrip ", " hd rip ", " hdlight ", " minibdrip ",
" webrip ", " web rip ",
" 720p ", " 1080p ", " 1080i ", " 720 ", " 1080 ", " 480i ", " 2160p ", " 4k ", " 480p ", " 576p ", " 576i ", " 240p ", " 360p ", " 4320p ", " 8k ",
" hdtv ", " sdtv ", " m hd ", " ultrahd ", " mhd ",
" h264 ", " x264 ", " aac ", " ac3 ", " ogm ", " dts ", " hevc ", " x265 ", " av1 ",
" avi ", " mkv ", " xvid ", " divx ", " wmv ", " mpg ", " mpeg ", " flv ", " f4v ",
" asf ", " vob ", " mp4 ", " mov ",
" directors cut ", " dircut ", " readnfo ", " read nfo ", " repack ", " rerip ", " multi ", " remastered ",
" truefrench ", " srt ", " extended cut ",
" sbs ", " hsbs ", " side by side ", " sidebyside ", /* Side-By-Side 3d stuff */
" 3d ", " h sbs ", " h tb ", " tb ", " htb ", " top bot ", " topbot ", " top bottom ", " topbottom ", " tab ", " htab ", /* Top-Bottom 3d stuff */
" anaglyph ", " anaglyphe ", /* Anaglyph 3d stuff */
" truehd ", " atmos ", " uhd ", " hdr10+ ", " hdr10 ", " hdr ", " dolby ", " dts-x ", " dts-hd.ma ",
" hfr ",
};
// stuff that could be present in real names is matched with tight case sensitive syntax
// strings here will only match if separated by any of " .-_"
private static final String[] GARBAGE_CASESENSITIVE
= { "FRENCH", "TRUEFRENCH", "DUAL", "MULTISUBS", "MULTI", "MULTi", "SUBFORCED", "SUBFORCES", "UNRATED", "UNRATED[ ._-]DC", "EXTENDED", "IMAX",
"COMPLETE", "PROPER", "iNTERNAL", "INTERNAL",
"SUBBED", "ANiME", "LIMITED", "REMUX", "DCPRip",
"TS", "TC", "REAL", "HD", "DDR", "WEB",
"EN", "ENG", "FR", "ES", "IT", "NL", "VFQ", "VF", "VO", "VOSTFR", "Eng",
"VOST", "VFF", "VF2", "VFI", "VFSTFR",
};
private static final Pattern[] GARBAGE_CASESENSITIVE_PATTERNS = new Pattern[GARBAGE_CASESENSITIVE.length];
static {
for (int i = 0; i < GARBAGE_CASESENSITIVE.length; i++) {
// case sensitive string wrapped in "space or . or _ or -", in the end either separator or end of line
// end of line is important since .foo.bar. could be stripped to .foo and that would no longer match .foo.
GARBAGE_CASESENSITIVE_PATTERNS[i] = Pattern.compile("[ ._-]" + GARBAGE_CASESENSITIVE[i] + "(?:[ ._-]|$)");
}
}
// ( whitespace | punctuation), matches dots, spaces, brackets etc
private static final String NON_CHARACTER
= "[\\s\\p{Punct}]";
// matches "19XX and 20XX" - capture group
private static final String YEAR_GROUP
= "((?:19|20)\\d{2})";
/**
* assumes title is always first
* @return substring from start to first finding of any garbage pattern
*/
private static String cutOffBeforeFirstMatch
(String input, Pattern
[] patterns
) { for (Pattern pattern : patterns) {
if (remaining.isEmpty()) return "";
Matcher matcher = pattern.matcher(remaining);
if (matcher.find()) {
remaining = remaining.substring(0, matcher.start());
}
}
return remaining;
}
/**
* assumes title is always first
* @param garbageStrings lower case strings
* @return substring from start to first finding of any garbage string
*/
public static final String cutOffBeforeFirstMatch
(String input,
String[] garbageStrings
) { // lower case input to test against lowercase strings
int firstGarbage = input.length();
for (String garbage
: garbageStrings
) { int garbageIndex = inputLowerCased.indexOf(garbage);
// if found, shrink to 0..index
if (garbageIndex > -1 && garbageIndex < firstGarbage)
firstGarbage = garbageIndex;
}
// return substring from input -> keep case
return input.substring(0, firstGarbage);
}
public static String replaceAllChars
(String input,
char[] badChars,
char newChar
) { if (badChars == null || badChars.length == 0)
return input;
int inputLength = input.length();
int replacementLenght = badChars.length;
boolean modified = false;
char[] buffer = new char[inputLength];
input.getChars(0, inputLength, buffer, 0);
for (int inputIdx = 0; inputIdx < inputLength; inputIdx++) {
char current = buffer[inputIdx];
for (int replacementIdx = 0; replacementIdx < replacementLenght; replacementIdx++) {
if (current == badChars[replacementIdx]) {
buffer[inputIdx] = newChar;
modified = true;
break;
}
}
}
return modified
? new String(buffer
) : input
; }
return pattern.matcher(input).replaceAll(replacement);
}
private static void println() {
}
private static void println
(String in
) { }
}
private static String getFileNameWithoutExtension
(String input
) { if (name != null) {
int dotPos = name.lastIndexOf('.');
if (dotPos > 0) {
name = name.substring(0, dotPos);
}
}
return name;
}
}
aW1wb3J0IGphdmEuaW8uQnVmZmVyZWRSZWFkZXI7CmltcG9ydCBqYXZhLmlvLkZpbGU7CmltcG9ydCBqYXZhLmlvLkZpbGVSZWFkZXI7CmltcG9ydCBqYXZhLmlvLklucHV0U3RyZWFtUmVhZGVyOwppbXBvcnQgamF2YS51dGlsLkxvY2FsZTsKaW1wb3J0IGphdmEudXRpbC5yZWdleC5NYXRjaGVyOwppbXBvcnQgamF2YS51dGlsLnJlZ2V4LlBhdHRlcm47CgovLyBUT0RPOiBhZGQgTW92aWVQYXRoTWF0Y2hlcgovLyBUT0RPOiBhZGQgVHZTaG93UGF0aE1hdGNoZXIKLy8gVE9ETzogYWRkIG90aGVyIHR2U2hvdwoKcHVibGljIGNsYXNzIFRlc3RTY3JhcGVyIHsKCiAgICBwdWJsaWMgc3RhdGljIHZvaWQgbWFpbihTdHJpbmdbXSBhcmdzKSB0aHJvd3MgRXhjZXB0aW9uIHsKICAgICAgICAvLyBmb3Igc3RkaW4KICAgICAgICBCdWZmZXJlZFJlYWRlciBpbnB1dEZpbGVSZWFkZXIgPSBuZXcgQnVmZmVyZWRSZWFkZXIobmV3IElucHV0U3RyZWFtUmVhZGVyKFN5c3RlbS5pbikpOwogICAgICAgIC8vIGZvciBmaWxlIGlucHV0CiAgICAgICAgLy8gRmlsZVJlYWRlciBpbnB1dEZpbGVSZWFkZXIgPSBuZXcgRmlsZVJlYWRlcigiaW5wdXQudHh0Iik7CiAgICAgICAgQnVmZmVyZWRSZWFkZXIgcmVhZGVyID0gbmV3IEJ1ZmZlcmVkUmVhZGVyKGlucHV0RmlsZVJlYWRlcik7CiAgICAgICAgU3RyaW5nIHRlc3Q7CiAgICAgICAgd2hpbGUgKCh0ZXN0ID0gcmVhZGVyLnJlYWRMaW5lKCkpICE9IG51bGwpIHsKICAgICAgICAgICAgZ2V0TWF0Y2goZ2V0RmlsZU5hbWVXaXRob3V0RXh0ZW5zaW9uKHRlc3QpKTsKICAgICAgICAgICAgcHJpbnRsbigpOwogICAgICAgIH0KICAgIH0KCiAgICAvKioKICAgICAqIE1hdGNoZXMgZXZlcnl0aGluZy4gVHJpZXMgdG8gc3RyaXAgYXdheSBhbGwganVuaywgbm90IHZlcnkgcmVsaWFibGUuCiAgICAgKiA8cD4KICAgICAqIFByb2Nlc3MgaXMgYXMgZm9sbG93czoKICAgICAqIDx1bD4KICAgICAqIDxsaT4gU3RhcnQgd2l0aCBmaWxlbmFtZSB3aXRob3V0IGV4dGVuc2lvbjogIjEwMC4gW0RWRF1TdGFyc2hpcF9Ucm9vcGVyc18xOTk1Li1IRHJpcC0tSVQiCiAgICAgKiA8bGk+IFJlbW92ZSBwb3RlbnRpYWwgc3RhcnRpbmcgbnVtYmVyaW5nIG9mIGNvbGxlY3Rpb25zICJbRFZEXVN0YXJzaGlwX1Ryb29wZXJzXzE5OTUuLUhEcmlwLS1JVCIKICAgICAqIDxsaT4gRXh0cmFjdCBsYXN0IHllYXIgaWYgYW55OiAiW0RWRF1TdGFyc2hpcF9Ucm9vcGVyc18uLUhEcmlwLS1JVCIKICAgICAqIDxsaT4gUmVtb3ZlIGFueXRoaW5nIGluIGJyYWNrZXRzOiAiU3RhcnNoaXBfVHJvb3BlcnNfLi1IRHJpcC0tSVQiCiAgICAgKiA8bGk+IEFzc3VtZSBmcm9tIGhlcmUgb24gdGhhdCB0aGUgdGl0bGUgaXMgZmlyc3QgZm9sbG93ZWQgYnkganVuawogICAgICogPGxpPiBUcmltIENhc0Ugc2Vuc2l0aXZlIGp1bms6ICJTdGFyc2hpcF9Ucm9vcGVyc18uLUhEcmlwIiAoIml0IiBjb3VsZCBiZSBwYXJ0IG9mIHRoZSBtb3ZpZSBuYW1lLCAiSVQiIHByb2JhYmx5IG5vdCkKICAgICAqIDxsaT4gUmVtb3ZlIHNlcGFyYXRvcnM6ICJTdGFyc2hpcCBUcm9vcGVycyBIRHJpcCIKICAgICAqIDxsaT4gVHJpbSBqdW5rIGNhc2UgaW5zZW5zaXRpdmU6ICJTdGFyc2hpcCBUcm9vcGVycyIKICAgICAqIDwvdWw+CiAgICAgKi8KICAgIHByaXZhdGUgc3RhdGljIHZvaWQgZ2V0TWF0Y2goU3RyaW5nIGlucHV0KSB7CiAgICAgICAgLy8gVE9ETyB0ZXN0IDNyZCBwYXJ0eSBkZW5vaXNlIHBhdHRlcm4KICAgICAgICAvLyBkZW5vaXNlIGZpbHRlciBEZWZhdWx0ID0gQCIoKFtcKFx7XFtdfFxiKSgoNTc2fDcyMHwxMDgwKVtwaV18ZGlyKGVjdG9ycyApP2N1dHxkdmQoW3I1OV18cmlwfHNjcihlZW5lcik/KXwoYXZjKT9oZHx3bXZ8bnRzY3xwYWx8bXBlZ3xkc3J8clsxLTVdfGJkWzU5XXxkdHN8YWMzfGJsdSgtKT9yYXl8W2hwXWR0dnxzdHZ8aGRkdmR8eHZpZHxkaXZ4fHgyNjR8ZHh2YXwoPy1pKUZFU1RbSWldVkFMfExbaUldTVtpSV1URUR8W1dGXVN8UFJPUEVSfFJFUEFDS3xSRVJbSWldUHxSRUFMfFJFVEFbSWldTHxFWFRFTkRFRHxSRU1BU1RFUkVEfFVOUkFURUR8Q0hST05PfFRIRUFUUltJaV1DQUx8REN8U0V8VU5DVVR8W0lpXU5URVJOQUx8W0RTXVVCQkVEKShbXF1cKVx9XXxcYikoLVteXHNdKyQpPykiKV0KCiAgICAgICAgU3RyaW5nIG5hbWUgPSBpbnB1dDsKICAgICAgICBwcmludGxuKCJpbnB1dCAgICAgICAgICAgIDogIiArIG5hbWUpOwoKICAgICAgICAvLyBleHRyYWN0IHRoZSBsYXN0IHllYXIgZnJvbSB0aGUgc3RyaW5nCiAgICAgICAgU3RyaW5nIHllYXIgPSBudWxsOwogICAgICAgIC8vIG1hdGNoZXMgIltzcGFjZSBvciBwdW5jdHVhdGlvbi9icmFja2V0cyBldGNdeWVhciIsIHllYXIgaXMgZ3JvdXAgMQogICAgICAgIGZpbmFsIFBhdHRlcm4gWUVBUl9QQVRURVJOID0gUGF0dGVybi5jb21waWxlKCJbXFxzXFxwe1B1bmN0fV0oKD86MTl8MjApXFxkezJ9KSg/IVxcZCkiKTsKICAgICAgICBNYXRjaGVyIG1hdGNoZXIgPSBZRUFSX1BBVFRFUk4ubWF0Y2hlcihuYW1lKTsKICAgICAgICBpbnQgc3RhcnQgPSAwOwogICAgICAgIGludCBzdG9wID0gMDsKICAgICAgICBib29sZWFuIGZvdW5kID0gZmFsc2U7CiAgICAgICAgd2hpbGUgKG1hdGNoZXIuZmluZCgpKSB7CiAgICAgICAgICAgIGZvdW5kID0gdHJ1ZTsKICAgICAgICAgICAgc3RhcnQgPSBtYXRjaGVyLnN0YXJ0KDEpOwogICAgICAgICAgICBzdG9wID0gbWF0Y2hlci5lbmQoMSk7CiAgICAgICAgfQogICAgICAgIC8vIGdldCB0aGUgbGFzdCBtYXRjaCBhbmQgZXh0cmFjdCBpdCBmcm9tIHRoZSBzdHJpbmcKICAgICAgICBpZiAoZm91bmQpIHsKICAgICAgICAgICAgeWVhciA9IG5hbWUuc3Vic3RyaW5nKHN0YXJ0LCBzdG9wKTsKICAgICAgICAgICAgbmFtZSA9IG5hbWUuc3Vic3RyaW5nKDAsIHN0YXJ0KSArIG5hbWUuc3Vic3RyaW5nKHN0b3ApOwogICAgICAgIH0KICAgICAgICBwcmludGxuKCJyZWxlYXNlIHllYXIgICAgIDogJXMgeWVhcjolcyIsIG5hbWUsIHllYXIpOwoKCiAgICAgICAgLy8gU3RyaXAgb3V0IHN0YXJ0aW5nIG51bWJlcmluZyBmb3IgY29sbGVjdGlvbnMKICAgICAgICAvLyBNYXRjaGVzICIxLiAiLCAiMSkgIiwgIjEgLSAiLCAiMS4tLiIsICIxLl8iLi4uIGJ1dCBub3QgIjEuRm9vIiBvciAiMS1Gb28iIC4uCiAgICAgICAgZmluYWwgUGF0dGVybiBMRUFESU5HX05VTUJFUklORyA9IFBhdHRlcm4uY29tcGlsZSgiXihcXGQrKFsuKV1bXFxzXFxwe1B1bmN0fV0rfFxccytcXHB7UHVuY3R9W1xccHtQdW5jdH1cXHNdKikpKiIpOwogICAgICAgIGZpbmFsIFBhdHRlcm4gTEVBRElOR19OVU1CRVJJTkcyID0gUGF0dGVybi5jb21waWxlKCJeKFswLTldK1sgX10qW1xcLVxcLl0rWyBfXSopKiIpOwogICAgICAgIG5hbWUgPSByZXBsYWNlQWxsKG5hbWUsICIiLCBMRUFESU5HX05VTUJFUklORyk7CiAgICAgICAgcHJpbnRsbigicmVtb3ZlIG51bWJlcmluZyA6ICIgKyBuYW1lKTsKCiAgICAgICAgLy8gU3RyaXAgb3V0IGV2ZXJ5dGhpbmcgZWxzZSBpbiBicmFja2V0cyA8W3soIC4uICl9KT4sIG1vc3Qgb2YgdGhlIHRpbWUgdGVhbXMgbmFtZXMsIGV0YwogICAgICAgIGZpbmFsIFBhdHRlcm4gQlJBQ0tFVFMgPSBQYXR0ZXJuLmNvbXBpbGUoIls8KHtcXFtdLis/Wz4pfVxcXV0iKTsKICAgICAgICBmaW5hbCBQYXR0ZXJuIEJSQUNLRVRTMiA9IFBhdHRlcm4uY29tcGlsZSgiWzxcXChcXFtcXHtdLis/Wz5cXClcXF1cXH1dIik7CiAgICAgICAgbmFtZSA9IHJlcGxhY2VBbGwobmFtZSwgIiIsIEJSQUNLRVRTKTsKICAgICAgICBwcmludGxuKCJicmFja2V0cyAgICAgICAgIDogIiArIG5hbWUpOwoKICAgICAgICAvLyBzdHJpcCBhd2F5IGtub3duIGNhc2Ugc2Vuc2l0aXZlIGdhcmJhZ2UKICAgICAgICBuYW1lID0gY3V0T2ZmQmVmb3JlRmlyc3RNYXRjaChuYW1lLCBHQVJCQUdFX0NBU0VTRU5TSVRJVkVfUEFUVEVSTlMpOwogICAgICAgIHByaW50bG4oIkNhU2UganVuayAgICAgICAgOiAiICsgbmFtZSk7CgogICAgICAgIC8vIHJlbW92ZXMgYWxsIHB1bmN0dWF0aW9uIGNoYXJhY3RlcnMgYmVzaWRlcyAnIEFsc28gZG9lcyBhcG9zdHJvcGhlIGFuZCBBY3JvbnltIHJlcGxhY2VtZW50CiAgICAgICAgLy8gcmVwbGFjZSBhbGwgcmVtYWluaW5nIHdoaXRlc3BhY2UgJiBwdW5jdHVhdGlvbiB3aXRoIGEgc2luZ2xlIHNwYWNlCiAgICAgICAgLy8gYnJlYWsgd2hhdCBkb2VzIHJlbW92ZUlubmVyQW5kT3V0dGVyU2VwYXJhdG9ySnVuayhuYW1lKTsKCiAgICAgICAgLy8gLT4gdGhpcyBpcyB1bmlmeUFwb3N0cm9waGVzKG5hbWUpCiAgICAgICAgLy8gcmVwbGFjZXMgYWx0ZXJuYXRpdmUgYXBvc3Ryb3BoZXMgd2l0aCBhIHNpbXBsZSAnCiAgICAgICAgLy8gYmVzaWRlcyB0aGUgcGxhaW4gJyB0aGVyZSBpcyB0aGUgdHlwb2dyYXBoaWMg4oCZIGFuZCDigJggd2hpY2ggaXMgYWN0dWFsbHkgbm90IGFuIGFwb3N0cm9waGUKICAgICAgICBmaW5hbCBjaGFyW10gQUxURVJOQVRFX0FQT1NUUk9QSEVTID0gbmV3IGNoYXJbXXsn4oCZJywgJ+KAmCd9OwogICAgICAgIG5hbWUgPSByZXBsYWNlQWxsQ2hhcnMobmFtZSwgQUxURVJOQVRFX0FQT1NUUk9QSEVTLCAnXCcnKTsKICAgICAgICBwcmludGxuKCJhcG9zdHJvcGhlcyAgICAgIDogIiArIG5hbWUpOwoKICAgICAgICAvLyAtPiB0aGlzIGlzIHJlcGxhY2VBY3JvbnltcyhuYW1lKQogICAgICAgIC8vIE1hdGNoZXMgZG90cyBpbiBiZXR3ZWVuIFVwcGVyY2FzZSBsZXR0ZXJzIGUuZy4gaW4gIkUuVC4iLCAiUy5ILkkuRS5MLkQuIiBidXQgbm90IGEgImEuYi5jLiIKICAgICAgICAvLyByZXBsYWNlcyAiUy5ILkkuRS5MLkQuIiB3aXRoICJTSElFTEQiLCBvbmx5IHVwcGVyY2FzZSBsZXR0ZXJzCiAgICAgICAgLy8gTGFzdCBkb3QgaXMga2VwdCAiYS5GLk8uTy5pcy5mb28iID0+ICJhLkZPTy5pcy5mb28iCiAgICAgICAgZmluYWwgUGF0dGVybiBBQ1JPTllNX0RPVFMgPSBQYXR0ZXJuLmNvbXBpbGUoIig/PD0oXFxifFsuX10pXFxwe0x1fSlbLl0oPz1cXHB7THV9KFsuXXwkKSkiKTsKICAgICAgICBuYW1lID0gcmVwbGFjZUFsbChuYW1lLCAiIiwgQUNST05ZTV9ET1RTKTsKICAgICAgICBwcmludGxuKCJhY3JvbnltcyAgICAgICAgIDogIiArIG5hbWUpOwoKICAgICAgICAvLyAtPiB0aGlzIGlzIHRoZSBlbmQgb2YgcmVtb3ZlSW5uZXJBbmRPdXR0ZXJTZXBhcmF0b3JKdW5rKG5hbWUpCiAgICAgICAgLy8gKCB3aGl0ZXNwYWNlIHwgcHVuY3R1YXRpb24pKywgbWF0Y2hlcyBkb3RzLCBzcGFjZXMsIGJyYWNrZXRzIGV0YwogICAgICAgIGZpbmFsIFBhdHRlcm4gTVVMVElfTk9OX0NIQVJBQ1RFUl9QQVRURVJOID0gUGF0dGVybi5jb21waWxlKCJbXFxzXFxwe1B1bmN0fSYmW14nXV0rIik7CiAgICAgICAgbmFtZSA9IHJlcGxhY2VBbGwobmFtZSwgIiAiLCBNVUxUSV9OT05fQ0hBUkFDVEVSX1BBVFRFUk4pLnRyaW0oKTsKICAgICAgICBwcmludGxuKCJzZXBhcmF0b3JzICAgICAgIDogIiArIG5hbWUpOwoKICAgICAgICAvLyBhcHBlbmQgYSAiICIgdG8gYWlkIG5leHQgc3RlcAogICAgICAgIC8vID4gIkZvbyBiYXIgMTA4MHAgQUMzICIgdG8gZmluZCBlLmcuICIgQUMzICIKICAgICAgICBuYW1lID0gbmFtZSArICIgIjsKCiAgICAgICAgLy8gdHJ5IHRvIHJlbW92ZSBtb3JlIGdhcmJhZ2UsIHRoaXMgdGltZSAiIGdhcmJhZ2UgIiBzeW50YXgKICAgICAgICAvLyBtZXRob2Qgd2lsbCBjb21wYXJlIHdpdGggbG93ZXJjYXNlIG5hbWUgYXV0b21hdGljYWxseQogICAgICAgIG5hbWUgPSBjdXRPZmZCZWZvcmVGaXJzdE1hdGNoKG5hbWUsIEdBUkJBR0VfTE9XRVJDQVNFKTsKICAgICAgICBwcmludGxuKCJsb3dlcmNhc2UganVuayAgIDogIiArIG5hbWUpOwoKICAgICAgICBuYW1lID0gbmFtZS50cmltKCk7CiAgICAgICAgcHJpbnRsbigiUkVTVUxUICAgICAgICAgICA6ICVzIHllYXI6JXMiLCBuYW1lLCB5ZWFyKTsKICAgIH0KCiAgICAvLyBNb3N0IG9mIHRoZSBjb21tb24gZ2FyYmFnZSBpbiBtb3ZpZXMgbmFtZSB3ZSB3YW50IHRvIHN0cmlwIG91dAogICAgLy8gKHRoZXkgY2FuIGJlIHBhcnQgb2YgdGhlIG5hbWUgb3IgY29ycmVzcG9uZCB0byBleHRlbnNpb25zIGFzIHdlbGwpLgogICAgcHJpdmF0ZSBzdGF0aWMgZmluYWwgU3RyaW5nW10gR0FSQkFHRV9MT1dFUkNBU0UgPSB7CiAgICAgICAgICAgICIgZHZkcmlwICIsICIgZHZkIHJpcCAiLCAiZHZkc2NyZWVuZXIgIiwgIiBkdmRzY3IgIiwgIiBkdmQgc2NyICIsCiAgICAgICAgICAgICIgYnJyaXAgIiwgIiBiciByaXAgIiwgIiBiZHJpcCIsICIgYmQgcmlwICIsICIgYmx1IHJheSAiLCAiIGJsdXJheSAiLAogICAgICAgICAgICAiIGhkZHZkICIsICIgaGQgZHZkICIsICIgaGRyaXAgIiwgIiBoZCByaXAgIiwgIiBoZGxpZ2h0ICIsICIgbWluaWJkcmlwICIsCiAgICAgICAgICAgICIgd2VicmlwICIsICIgd2ViIHJpcCAiLAogICAgICAgICAgICAiIDcyMHAgIiwgIiAxMDgwcCAiLCAiIDEwODBpICIsICIgNzIwICIsICIgMTA4MCAiLCAiIDQ4MGkgIiwgIiAyMTYwcCAiLCAiIDRrICIsICIgNDgwcCAiLCAiIDU3NnAgIiwgIiA1NzZpICIsICIgMjQwcCAiLCAiIDM2MHAgIiwgIiA0MzIwcCAiLCAiIDhrICIsCiAgICAgICAgICAgICIgaGR0diAiLCAiIHNkdHYgIiwgIiBtIGhkICIsICIgdWx0cmFoZCAiLCAiIG1oZCAiLAogICAgICAgICAgICAiIGgyNjQgIiwgIiB4MjY0ICIsICIgYWFjICIsICIgYWMzICIsICIgb2dtICIsICIgZHRzICIsICIgaGV2YyAiLCAiIHgyNjUgIiwgIiBhdjEgIiwKICAgICAgICAgICAgIiBhdmkgIiwgIiBta3YgIiwgIiB4dmlkICIsICIgZGl2eCAiLCAiIHdtdiAiLCAiIG1wZyAiLCAiIG1wZWcgIiwgIiBmbHYgIiwgIiBmNHYgIiwKICAgICAgICAgICAgIiBhc2YgIiwgIiB2b2IgIiwgIiBtcDQgIiwgIiBtb3YgIiwKICAgICAgICAgICAgIiBkaXJlY3RvcnMgY3V0ICIsICIgZGlyY3V0ICIsICIgcmVhZG5mbyAiLCAiIHJlYWQgbmZvICIsICIgcmVwYWNrICIsICIgcmVyaXAgIiwgIiBtdWx0aSAiLCAiIHJlbWFzdGVyZWQgIiwKICAgICAgICAgICAgIiB0cnVlZnJlbmNoICIsICIgc3J0ICIsICIgZXh0ZW5kZWQgY3V0ICIsCiAgICAgICAgICAgICIgc2JzICIsICIgaHNicyAiLCAiIHNpZGUgYnkgc2lkZSAiLCAiIHNpZGVieXNpZGUgIiwgLyogU2lkZS1CeS1TaWRlIDNkIHN0dWZmICovCiAgICAgICAgICAgICIgM2QgIiwgIiBoIHNicyAiLCAiIGggdGIgIiwgIiB0YiAiLCAiIGh0YiAiLCAiIHRvcCBib3QgIiwgIiB0b3Bib3QgIiwgIiB0b3AgYm90dG9tICIsICIgdG9wYm90dG9tICIsICIgdGFiICIsICIgaHRhYiAiLCAvKiBUb3AtQm90dG9tIDNkIHN0dWZmICovCiAgICAgICAgICAgICIgYW5hZ2x5cGggIiwgIiBhbmFnbHlwaGUgIiwgLyogQW5hZ2x5cGggM2Qgc3R1ZmYgKi8KICAgICAgICAgICAgIiB0cnVlaGQgIiwgIiBhdG1vcyAiLCAiIHVoZCAiLCAiIGhkcjEwKyAiLCAiIGhkcjEwICIsICIgaGRyICIsICIgZG9sYnkgIiwgIiBkdHMteCAiLCAiIGR0cy1oZC5tYSAiLAogICAgICAgICAgICAiIGhmciAiLAogICAgfTsKICAgIC8vIHN0dWZmIHRoYXQgY291bGQgYmUgcHJlc2VudCBpbiByZWFsIG5hbWVzIGlzIG1hdGNoZWQgd2l0aCB0aWdodCBjYXNlIHNlbnNpdGl2ZSBzeW50YXgKICAgIC8vIHN0cmluZ3MgaGVyZSB3aWxsIG9ubHkgbWF0Y2ggaWYgc2VwYXJhdGVkIGJ5IGFueSBvZiAiIC4tXyIKICAgIHByaXZhdGUgc3RhdGljIGZpbmFsIFN0cmluZ1tdIEdBUkJBR0VfQ0FTRVNFTlNJVElWRSA9IHsKICAgICAgICAgICAgIkZSRU5DSCIsICJUUlVFRlJFTkNIIiwgIkRVQUwiLCAiTVVMVElTVUJTIiwgIk1VTFRJIiwgIk1VTFRpIiwgIlNVQkZPUkNFRCIsICJTVUJGT1JDRVMiLCAiVU5SQVRFRCIsICJVTlJBVEVEWyAuXy1dREMiLCAiRVhURU5ERUQiLCAiSU1BWCIsCiAgICAgICAgICAgICJDT01QTEVURSIsICJQUk9QRVIiLCAiaU5URVJOQUwiLCAiSU5URVJOQUwiLAogICAgICAgICAgICAiU1VCQkVEIiwgIkFOaU1FIiwgIkxJTUlURUQiLCAiUkVNVVgiLCAiRENQUmlwIiwKICAgICAgICAgICAgIlRTIiwgIlRDIiwgIlJFQUwiLCAiSEQiLCAiRERSIiwgIldFQiIsCiAgICAgICAgICAgICJFTiIsICJFTkciLCAiRlIiLCAiRVMiLCAiSVQiLCAiTkwiLCAiVkZRIiwgIlZGIiwgIlZPIiwgIlZPU1RGUiIsICJFbmciLAogICAgICAgICAgICAiVk9TVCIsICJWRkYiLCAiVkYyIiwgIlZGSSIsICJWRlNURlIiLAogICAgfTsKCiAgICBwcml2YXRlIHN0YXRpYyBmaW5hbCBQYXR0ZXJuW10gR0FSQkFHRV9DQVNFU0VOU0lUSVZFX1BBVFRFUk5TID0gbmV3IFBhdHRlcm5bR0FSQkFHRV9DQVNFU0VOU0lUSVZFLmxlbmd0aF07CgogICAgc3RhdGljIHsKICAgICAgICBmb3IgKGludCBpID0gMDsgaSA8IEdBUkJBR0VfQ0FTRVNFTlNJVElWRS5sZW5ndGg7IGkrKykgewogICAgICAgICAgICAvLyBjYXNlIHNlbnNpdGl2ZSBzdHJpbmcgd3JhcHBlZCBpbiAic3BhY2Ugb3IgLiBvciBfIG9yIC0iLCBpbiB0aGUgZW5kIGVpdGhlciBzZXBhcmF0b3Igb3IgZW5kIG9mIGxpbmUKICAgICAgICAgICAgLy8gZW5kIG9mIGxpbmUgaXMgaW1wb3J0YW50IHNpbmNlIC5mb28uYmFyLiBjb3VsZCBiZSBzdHJpcHBlZCB0byAuZm9vIGFuZCB0aGF0IHdvdWxkIG5vIGxvbmdlciBtYXRjaCAuZm9vLgogICAgICAgICAgICBHQVJCQUdFX0NBU0VTRU5TSVRJVkVfUEFUVEVSTlNbaV0gPSBQYXR0ZXJuLmNvbXBpbGUoIlsgLl8tXSIgKyBHQVJCQUdFX0NBU0VTRU5TSVRJVkVbaV0gKyAiKD86WyAuXy1dfCQpIik7CiAgICAgICAgfQogICAgfQoKICAgIC8vICggd2hpdGVzcGFjZSB8IHB1bmN0dWF0aW9uKSwgbWF0Y2hlcyBkb3RzLCBzcGFjZXMsIGJyYWNrZXRzIGV0YwogICAgcHJpdmF0ZSBzdGF0aWMgZmluYWwgU3RyaW5nIE5PTl9DSEFSQUNURVIgPSAiW1xcc1xccHtQdW5jdH1dIjsKCiAgICAvLyBtYXRjaGVzICIxOVhYIGFuZCAyMFhYIiAtIGNhcHR1cmUgZ3JvdXAKICAgIHByaXZhdGUgc3RhdGljIGZpbmFsIFN0cmluZyBZRUFSX0dST1VQID0gIigoPzoxOXwyMClcXGR7Mn0pIjsKCiAgICAvKioKICAgICAqIGFzc3VtZXMgdGl0bGUgaXMgYWx3YXlzIGZpcnN0CiAgICAgKiBAcmV0dXJuIHN1YnN0cmluZyBmcm9tIHN0YXJ0IHRvIGZpcnN0IGZpbmRpbmcgb2YgYW55IGdhcmJhZ2UgcGF0dGVybgogICAgICovCiAgICBwcml2YXRlIHN0YXRpYyBTdHJpbmcgY3V0T2ZmQmVmb3JlRmlyc3RNYXRjaChTdHJpbmcgaW5wdXQsIFBhdHRlcm5bXSBwYXR0ZXJucykgewogICAgICAgIFN0cmluZyByZW1haW5pbmcgPSBpbnB1dDsKICAgICAgICBmb3IgKFBhdHRlcm4gcGF0dGVybiA6IHBhdHRlcm5zKSB7CiAgICAgICAgICAgIGlmIChyZW1haW5pbmcuaXNFbXB0eSgpKSByZXR1cm4gIiI7CgogICAgICAgICAgICBNYXRjaGVyIG1hdGNoZXIgPSBwYXR0ZXJuLm1hdGNoZXIocmVtYWluaW5nKTsKICAgICAgICAgICAgaWYgKG1hdGNoZXIuZmluZCgpKSB7CiAgICAgICAgICAgICAgICByZW1haW5pbmcgPSByZW1haW5pbmcuc3Vic3RyaW5nKDAsIG1hdGNoZXIuc3RhcnQoKSk7CiAgICAgICAgICAgIH0KICAgICAgICB9CiAgICAgICAgcmV0dXJuIHJlbWFpbmluZzsKICAgIH0KCiAgICAvKioKICAgICAqIGFzc3VtZXMgdGl0bGUgaXMgYWx3YXlzIGZpcnN0CiAgICAgKiBAcGFyYW0gZ2FyYmFnZVN0cmluZ3MgbG93ZXIgY2FzZSBzdHJpbmdzCiAgICAgKiBAcmV0dXJuIHN1YnN0cmluZyBmcm9tIHN0YXJ0IHRvIGZpcnN0IGZpbmRpbmcgb2YgYW55IGdhcmJhZ2Ugc3RyaW5nCiAgICAgKi8KICAgIHB1YmxpYyBzdGF0aWMgZmluYWwgU3RyaW5nIGN1dE9mZkJlZm9yZUZpcnN0TWF0Y2goU3RyaW5nIGlucHV0LCBTdHJpbmdbXSBnYXJiYWdlU3RyaW5ncykgewogICAgICAgIC8vIGxvd2VyIGNhc2UgaW5wdXQgdG8gdGVzdCBhZ2FpbnN0IGxvd2VyY2FzZSBzdHJpbmdzCiAgICAgICAgU3RyaW5nIGlucHV0TG93ZXJDYXNlZCA9IGlucHV0LnRvTG93ZXJDYXNlKExvY2FsZS5VUyk7CgogICAgICAgIGludCBmaXJzdEdhcmJhZ2UgPSBpbnB1dC5sZW5ndGgoKTsKCiAgICAgICAgZm9yIChTdHJpbmcgZ2FyYmFnZSA6IGdhcmJhZ2VTdHJpbmdzKSB7CiAgICAgICAgICAgIGludCBnYXJiYWdlSW5kZXggPSBpbnB1dExvd2VyQ2FzZWQuaW5kZXhPZihnYXJiYWdlKTsKICAgICAgICAgICAgLy8gaWYgZm91bmQsIHNocmluayB0byAwLi5pbmRleAogICAgICAgICAgICBpZiAoZ2FyYmFnZUluZGV4ID4gLTEgJiYgZ2FyYmFnZUluZGV4IDwgZmlyc3RHYXJiYWdlKQogICAgICAgICAgICAgICAgZmlyc3RHYXJiYWdlID0gZ2FyYmFnZUluZGV4OwogICAgICAgIH0KCiAgICAgICAgLy8gcmV0dXJuIHN1YnN0cmluZyBmcm9tIGlucHV0IC0+IGtlZXAgY2FzZQogICAgICAgIHJldHVybiBpbnB1dC5zdWJzdHJpbmcoMCwgZmlyc3RHYXJiYWdlKTsKICAgIH0KCgogICAgcHVibGljIHN0YXRpYyBTdHJpbmcgcmVwbGFjZUFsbENoYXJzKFN0cmluZyBpbnB1dCwgY2hhcltdIGJhZENoYXJzLCBjaGFyIG5ld0NoYXIpIHsKICAgICAgICBpZiAoYmFkQ2hhcnMgPT0gbnVsbCB8fCBiYWRDaGFycy5sZW5ndGggPT0gMCkKICAgICAgICAgICAgcmV0dXJuIGlucHV0OwogICAgICAgIGludCBpbnB1dExlbmd0aCA9IGlucHV0Lmxlbmd0aCgpOwogICAgICAgIGludCByZXBsYWNlbWVudExlbmdodCA9IGJhZENoYXJzLmxlbmd0aDsKICAgICAgICBib29sZWFuIG1vZGlmaWVkID0gZmFsc2U7CiAgICAgICAgY2hhcltdIGJ1ZmZlciA9IG5ldyBjaGFyW2lucHV0TGVuZ3RoXTsKICAgICAgICBpbnB1dC5nZXRDaGFycygwLCBpbnB1dExlbmd0aCwgYnVmZmVyLCAwKTsKICAgICAgICBmb3IgKGludCBpbnB1dElkeCA9IDA7IGlucHV0SWR4IDwgaW5wdXRMZW5ndGg7IGlucHV0SWR4KyspIHsKICAgICAgICAgICAgY2hhciBjdXJyZW50ID0gYnVmZmVyW2lucHV0SWR4XTsKICAgICAgICAgICAgZm9yIChpbnQgcmVwbGFjZW1lbnRJZHggPSAwOyByZXBsYWNlbWVudElkeCA8IHJlcGxhY2VtZW50TGVuZ2h0OyByZXBsYWNlbWVudElkeCsrKSB7CiAgICAgICAgICAgICAgICBpZiAoY3VycmVudCA9PSBiYWRDaGFyc1tyZXBsYWNlbWVudElkeF0pIHsKICAgICAgICAgICAgICAgICAgICBidWZmZXJbaW5wdXRJZHhdID0gbmV3Q2hhcjsKICAgICAgICAgICAgICAgICAgICBtb2RpZmllZCA9IHRydWU7CiAgICAgICAgICAgICAgICAgICAgYnJlYWs7CiAgICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0KICAgICAgICB9CiAgICAgICAgcmV0dXJuIG1vZGlmaWVkID8gbmV3IFN0cmluZyhidWZmZXIpIDogaW5wdXQ7CiAgICB9CgogICAgcHVibGljIHN0YXRpYyBTdHJpbmcgcmVwbGFjZUFsbChTdHJpbmcgaW5wdXQsIFN0cmluZyByZXBsYWNlbWVudCwgUGF0dGVybiBwYXR0ZXJuKSB7CiAgICAgICAgcmV0dXJuIHBhdHRlcm4ubWF0Y2hlcihpbnB1dCkucmVwbGFjZUFsbChyZXBsYWNlbWVudCk7CiAgICB9CgogICAgcHJpdmF0ZSBzdGF0aWMgdm9pZCBwcmludGxuKCkgewogICAgICAgIFN5c3RlbS5vdXQucHJpbnRsbigpOwogICAgfQoKICAgIHByaXZhdGUgc3RhdGljIHZvaWQgcHJpbnRsbihTdHJpbmcgaW4pIHsKICAgICAgICBTeXN0ZW0ub3V0LnByaW50bG4oaW4pOwogICAgfQoKICAgIHByaXZhdGUgc3RhdGljIHZvaWQgcHJpbnRsbihTdHJpbmcgaW4sIE9iamVjdC4uLiBhcmdzKSB7CiAgICAgICAgU3lzdGVtLm91dC5wcmludGxuKFN0cmluZy5mb3JtYXQoaW4sIGFyZ3MpKTsKICAgIH0KCiAgICBwcml2YXRlIHN0YXRpYyBTdHJpbmcgZ2V0RmlsZU5hbWVXaXRob3V0RXh0ZW5zaW9uKFN0cmluZyBpbnB1dCkgewogICAgICAgIEZpbGUgZmlsZSA9IG5ldyBGaWxlKGlucHV0KTsKICAgICAgICBTdHJpbmcgbmFtZSA9IGZpbGUuZ2V0TmFtZSgpOwogICAgICAgIGlmIChuYW1lICE9IG51bGwpIHsKICAgICAgICAgICAgaW50IGRvdFBvcyA9IG5hbWUubGFzdEluZGV4T2YoJy4nKTsKICAgICAgICAgICAgaWYgKGRvdFBvcyA+IDApIHsKICAgICAgICAgICAgICAgIG5hbWUgPSBuYW1lLnN1YnN0cmluZygwLCBkb3RQb3MpOwogICAgICAgICAgICB9CiAgICAgICAgfQogICAgICAgIHJldHVybiBuYW1lOwogICAgfQp9