<?php
// Find all the "words" from the data
// Calculate the total bytes the word uses in the data
foreach ($matches[0] as $word) {
if (!isset($freq[$word])) { // Yet unused, initialize it
} else { // Seen before, just add to it
$freq[$word] += strlen($word); }
}
// For first word we'll substitute it with '0', so start there.
$num = 0;
// Prepare our dictionary
// For each of the words...
foreach ($freq as $word => $size) {
// Set aside the length of the word we're substituting
// Calculate how many times the word was seen
$seen = $size / $len;
// If the (dictionary entry + substitution size * occurances)
// is less than the number of bytes the word currently takes or
// if the "word" happens to be a number...
// Then replace it with its representation in the data
// Add the word to the dictionary (it's index is it's substitution)
$dict[] = $word;
// The next substitution gets the subsequent number
$num++;
}
}
// Glue the dictionary data together with nulls
// Glue the compressed data on the end of the dictionary with two nulls
$comp = $dict . $null . $null . $data;
// Write the whole thing to file
?>
PD9waHAKCiRudWxsID0gY2hyKDApOwokZGF0YSA9IGZpbGVfZ2V0X2NvbnRlbnRzKCdpJyk7CgovLyBGaW5kIGFsbCB0aGUgIndvcmRzIiBmcm9tIHRoZSBkYXRhCnByZWdfbWF0Y2hfYWxsKCd8XGIoXHcrKVxifCcsICRkYXRhLCAkbWF0Y2hlcyk7CgovLyBDYWxjdWxhdGUgdGhlIHRvdGFsIGJ5dGVzIHRoZSB3b3JkIHVzZXMgaW4gdGhlIGRhdGEKJGZyZXEgPSBhcnJheSgpCmZvcmVhY2ggKCRtYXRjaGVzWzBdIGFzICR3b3JkKSB7CiAgICAKICAgIGlmICghaXNzZXQoJGZyZXFbJHdvcmRdKSkgeyAvLyBZZXQgdW51c2VkLCBpbml0aWFsaXplIGl0CiAgICAgICAgJGZyZXFbJHdvcmRdICA9IHN0cmxlbigkd29yZCk7CiAgICAKICAgIH0gZWxzZSB7ICAgICAgICAgICAgICAgICAgICAvLyBTZWVuIGJlZm9yZSwganVzdCBhZGQgdG8gaXQKICAgICAgICAkZnJlcVskd29yZF0gKz0gc3RybGVuKCR3b3JkKTsKICAgIH0KfQoKCi8vIEZvciBmaXJzdCB3b3JkIHdlJ2xsIHN1YnN0aXR1dGUgaXQgd2l0aCAnMCcsIHNvIHN0YXJ0IHRoZXJlLgokbnVtID0gMDsKCi8vIFByZXBhcmUgb3VyIGRpY3Rpb25hcnkKJGRpY3QgPSBhcnJheSgpOwoKLy8gRm9yIGVhY2ggb2YgdGhlIHdvcmRzLi4uCmZvcmVhY2ggKCRmcmVxIGFzICR3b3JkID0+ICRzaXplKSB7CiAgICAvLyBTZXQgYXNpZGUgdGhlIGxlbmd0aCBvZiB0aGUgd29yZCB3ZSdyZSBzdWJzdGl0dXRpbmcKICAgICRsZW4gPSBzdHJsZW4oJHdvcmQpOwogICAgCiAgICAvLyBDYWxjdWxhdGUgaG93IG1hbnkgdGltZXMgdGhlIHdvcmQgd2FzIHNlZW4KICAgICRzZWVuID0gJHNpemUgLyAkbGVuOwogICAgCiAgICAvLyBJZiB0aGUgKGRpY3Rpb25hcnkgZW50cnkgKyBzdWJzdGl0dXRpb24gc2l6ZSAqIG9jY3VyYW5jZXMpCiAgICAvLyAgIGlzIGxlc3MgdGhhbiB0aGUgbnVtYmVyIG9mIGJ5dGVzIHRoZSB3b3JkIGN1cnJlbnRseSB0YWtlcyBvcgogICAgLy8gICBpZiB0aGUgIndvcmQiIGhhcHBlbnMgdG8gYmUgYSBudW1iZXIuLi4KICAgIGlmICgkbGVuICsgJHNlZW4gKiBzdHJsZW4oJG51bSkgPCAkc2l6ZSB8fCBpc19pbnQoJHdvcmQpKSB7CiAgICAKICAgICAgICAvLyBUaGVuIHJlcGxhY2UgaXQgd2l0aCBpdHMgcmVwcmVzZW50YXRpb24gaW4gdGhlIGRhdGEKICAgICAgICAkZGF0YSA9IHByZWdfcmVwbGFjZSgnfFxiJy4gcHJlZ19xdW90ZSgkd29yZCkgLidcYnwnLCAkbnVtLCAkZGF0YSk7CiAgICAgICAgCiAgICAgICAgLy8gQWRkIHRoZSB3b3JkIHRvIHRoZSBkaWN0aW9uYXJ5IChpdCdzIGluZGV4IGlzIGl0J3Mgc3Vic3RpdHV0aW9uKQogICAgICAgICRkaWN0W10gPSAkd29yZDsKICAgICAgICAKICAgICAgICAvLyBUaGUgbmV4dCBzdWJzdGl0dXRpb24gZ2V0cyB0aGUgc3Vic2VxdWVudCBudW1iZXIKICAgICAgICAkbnVtKys7CiAgICB9Cn0KCi8vIEdsdWUgdGhlIGRpY3Rpb25hcnkgZGF0YSB0b2dldGhlciB3aXRoIG51bGxzCiRkaWN0ID0gaW1wbG9kZSgkbnVsbCwgJGRpY3QpOwoKLy8gR2x1ZSB0aGUgY29tcHJlc3NlZCBkYXRhIG9uIHRoZSBlbmQgb2YgdGhlIGRpY3Rpb25hcnkgd2l0aCB0d28gbnVsbHMKJGNvbXAgPSAkZGljdCAuICRudWxsIC4gJG51bGwgLiAkZGF0YTsKCi8vIFdyaXRlIHRoZSB3aG9sZSB0aGluZyB0byBmaWxlCmZpbGVfcHV0X2NvbnRlbnRzKCdvJywgJGNvbXApOwoKPz4=