#!/usr/bin/env perl
use warnings;
use strict;
use List
::Util qw(shuffle
);
# freq table extracted from Ubuntu's /usr/share/dict/words wordlist
my %freq = (
a => 64439,
b => 15526,
c => 31872,
d => 28531,
e => 88833,
f => 10675,
g => 22712,
h => 19320,
i => 66986,
j => 1948,
k => 8409,
l => 41107,
n => 57144,
o => 48944,
p => 22274,
r => 57347,
t => 53006,
u => 26118,
v => 7989,
w => 7530,
x => 2124,
z => 3281,
);
my $sum = 0;
my %running_sum;
for(@letters) {
$running_sum{$_} = $sum;
$sum += $freq{$_};
}
my $curmax = 1;
my $curletter = $#letters;
my $i = 100; # the number of letters we want to generate
my @result;
while ($i > 0) {
# curmax generates a uniformly distributed decreasing random number in [0,1)
# see http://r...content-available-to-author-only...u.edu/cgi/viewcontent.cgi?article=3483&context=compsci
$curmax = $curmax * (1-rand())**(1. / $i);
# scale the random number to [0,$sum)
my $num = int ($curmax * $sum);
# find the range that contains $num
while ($num < $running_sum{$letters[$curletter]}) {
$curletter--;
}
push(@result, $letters[$curletter]);
$i--;
}
# since $result is sorted, you may want to use shuffle it first
IyEvdXNyL2Jpbi9lbnYgcGVybAp1c2Ugd2FybmluZ3M7CnVzZSBzdHJpY3Q7CnVzZSBMaXN0OjpVdGlsIHF3KHNodWZmbGUpOwoKIyBmcmVxIHRhYmxlIGV4dHJhY3RlZCBmcm9tIFVidW50dSdzIC91c3Ivc2hhcmUvZGljdC93b3JkcyB3b3JkbGlzdApteSAlZnJlcSA9ICgKICAgIGEgPT4gNjQ0MzksIAogICAgYiA9PiAxNTUyNiwgCiAgICBjID0+IDMxODcyLCAKICAgIGQgPT4gMjg1MzEsIAogICAgZSA9PiA4ODgzMywgCiAgICBmID0+IDEwNjc1LCAKICAgIGcgPT4gMjI3MTIsIAogICAgaCA9PiAxOTMyMCwgCiAgICBpID0+IDY2OTg2LCAKICAgIGogPT4gMTk0OCwgCiAgICBrID0+IDg0MDksIAogICAgbCA9PiA0MTEwNywgCiAgICBtID0+IDIyNTA4LCAKICAgIG4gPT4gNTcxNDQsIAogICAgbyA9PiA0ODk0NCwgCiAgICBwID0+IDIyMjc0LCAKICAgIHEgPT4gMTUyNCwgCiAgICByID0+IDU3MzQ3LCAKICAgIHMgPT4gOTAxMTMsIAogICAgdCA9PiA1MzAwNiwgCiAgICB1ID0+IDI2MTE4LCAKICAgIHYgPT4gNzk4OSwgCiAgICB3ID0+IDc1MzAsIAogICAgeCA9PiAyMTI0LCAKICAgIHkgPT4gMTI2NTIsIAogICAgeiA9PiAzMjgxLAopOwoKbXkgQGxldHRlcnMgPSBzb3J0IGtleXMgJWZyZXE7CgpteSAkc3VtID0gMDsKbXkgJXJ1bm5pbmdfc3VtOwpmb3IoQGxldHRlcnMpIHsKICAgICRydW5uaW5nX3N1bXskX30gPSAkc3VtOwogICAgJHN1bSArPSAkZnJlcXskX307Cn0KCm15ICRjdXJtYXggPSAxOwpteSAkY3VybGV0dGVyID0gJCNsZXR0ZXJzOwpteSAkaSA9IDEwMDsgIyB0aGUgbnVtYmVyIG9mIGxldHRlcnMgd2Ugd2FudCB0byBnZW5lcmF0ZQpteSBAcmVzdWx0Owp3aGlsZSAoJGkgPiAwKSB7CiAgICAjIGN1cm1heCBnZW5lcmF0ZXMgYSB1bmlmb3JtbHkgZGlzdHJpYnV0ZWQgZGVjcmVhc2luZyByYW5kb20gbnVtYmVyIGluIFswLDEpCiAgICAjIHNlZSBodHRwOi8vci4uLmNvbnRlbnQtYXZhaWxhYmxlLXRvLWF1dGhvci1vbmx5Li4udS5lZHUvY2dpL3ZpZXdjb250ZW50LmNnaT9hcnRpY2xlPTM0ODMmY29udGV4dD1jb21wc2NpCiAgICAkY3VybWF4ID0gJGN1cm1heCAqICgxLXJhbmQoKSkqKigxLiAvICRpKTsKCiAgICAjIHNjYWxlIHRoZSByYW5kb20gbnVtYmVyIHRvIFswLCRzdW0pCiAgICBteSAkbnVtID0gaW50ICgkY3VybWF4ICogJHN1bSk7CgogICAgIyBmaW5kIHRoZSByYW5nZSB0aGF0IGNvbnRhaW5zICRudW0KICAgIHdoaWxlICgkbnVtIDwgJHJ1bm5pbmdfc3VteyRsZXR0ZXJzWyRjdXJsZXR0ZXJdfSkgewogICAgICAgICRjdXJsZXR0ZXItLTsKICAgIH0KCiAgICBwdXNoKEByZXN1bHQsICRsZXR0ZXJzWyRjdXJsZXR0ZXJdKTsKCiAgICAkaS0tOwp9CgojIHNpbmNlICRyZXN1bHQgaXMgc29ydGVkLCB5b3UgbWF5IHdhbnQgdG8gdXNlIHNodWZmbGUgaXQgZmlyc3QKcHJpbnQgIiIsIGpvaW4oJycsIHNodWZmbGUoQHJlc3VsdCkpOwo=