fork(1) download
  1. <?php
  2.  
  3. class RandomKeyMultiple {
  4. private $pool = array();
  5. private $max_range;
  6.  
  7. function __construct( $source ) {
  8. // build the look-up array
  9. foreach ( $source as $key => $value ) {
  10. for ( $i = 0; $i < $value; $i++ ) {
  11. $this->pool[] = $key;
  12. }
  13. }
  14. $this->max_range = count($this->pool) - 1;
  15. }
  16.  
  17. function get_random_key() {
  18. $x = mt_rand(0, $this->max_range);
  19.  
  20. return $this->pool[$x];
  21. }
  22. }
  23.  
  24. class RandomKey {
  25. private $steps = array();
  26. private $last_key;
  27. private $max_range;
  28.  
  29. function __construct( $source ) {
  30. // sort in ascending order to partially avoid numerical issues
  31. asort($source);
  32.  
  33. // calculate the partial sums. Considering OP's array:
  34. //
  35. // 1300 ----> 0
  36. // 30000 ----> 1300
  37. // 190000 ----> 31300
  38. // 265000 ----> 221300 endind with $partial = 486300
  39. //
  40. $partial = 0;
  41. $temp = 0;
  42. foreach ( $source as $k => &$v ) {
  43. $temp = $v;
  44. $v = $partial;
  45. $partial += $temp;
  46. }
  47.  
  48. // scale the steps to cover the entire mt_rand() range
  49. $factor = mt_getrandmax() / $partial;
  50. foreach ( $source as $k => &$v ) {
  51. $v *= $factor;
  52. }
  53.  
  54. // Having the most probably outcomes first, minimizes the look-up of
  55. // the correct index
  56. $this->steps = array_reverse($source);
  57.  
  58. // remove last element (don't needed during checks) but save the key
  59. end($this->steps);
  60. $this->last_key = key($this->steps);
  61. array_pop($this->steps);
  62. }
  63.  
  64. function get_random_key() {
  65. $x = mt_rand();
  66.  
  67. foreach ( $this->steps as $key => $value ) {
  68. if ( $x > $value ) {
  69. return $key;
  70. }
  71. }
  72. return $this->last_key;
  73. }
  74.  
  75. }
  76.  
  77. // *********** test section *************
  78.  
  79. function number_right( $num, $size ) {
  80. return str_pad(number_format($num), $size, ' ', STR_PAD_LEFT);
  81. }
  82.  
  83. function percentage_right( $num, $decimals, $size ) {
  84. return str_pad(number_format(100 * $num, 3), $size, ' ', STR_PAD_LEFT);
  85. }
  86.  
  87.  
  88. function test_randomness( $test, $original, $iterations ) {
  89.  
  90. $sum = 0;
  91. foreach ( $original as $k => $v ) {
  92. $buckets[$k] = 0;
  93. $sum += $v;
  94. }
  95.  
  96. $start = microtime(true);
  97. for ( $i = 0; $i < $iterations; $i++ ) {
  98. $buckets[$test->get_random_key()]++;
  99. }
  100. $stop = microtime(true);
  101. $duration = $stop - $start;
  102. arsort($buckets);
  103. echo "Distribution of ", number_format($iterations),
  104. " samples (generated in ", number_format($duration,5), " seconds):\n",
  105. "======================================================\n",
  106. " key occurences (%) (expected %) \n",
  107. "------------------------------------------------------\n";
  108. foreach ( $buckets as $i => $num ) {
  109. echo " " , str_pad($i . ": ", 16),
  110. number_right($num, 10),
  111. percentage_right($num / $iterations, 3, 11),
  112. percentage_right($original[$i] / $sum, 3, 10), "\n";
  113. }
  114. echo "------------------------------------------------------\n";
  115. }
  116.  
  117. function test_steps( $arr, $name, $rep ) {
  118. echo "\nTesting steps method with $name array.\n";
  119. $test = new RandomKey($arr);
  120. test_randomness($test, $arr, $rep);
  121. }
  122.  
  123. function test_reps( $arr, $name, $rep ) {
  124. echo "\nTesting repetitions method with $name array.\n";
  125. $test = new RandomKeyMultiple($arr);
  126. test_randomness($test, $arr, $rep);
  127. }
  128.  
  129. $repetitions = 500000;
  130.  
  131. $coin = array(
  132. 'head' => 1,
  133. 'tails' => 1
  134. );
  135. test_reps($coin, "coins", $repetitions);
  136.  
  137. $dice = array( // results of throwing two dices
  138. '2' => 1, // 1 + 1
  139. '3' => 2, // 1 + 2 or 2 + 1
  140. '4' => 3, // 1 + 3 or 2 + 2 or 3 + 1
  141. '5' => 4, // 1 + 4 or 2 + 3 or 3 + 2 or 4 + 1
  142. '6' => 5, // 1 + 5 or 2 + 4 or 3 + 3 or 4 + 2 or 5 + 1
  143. '7' => 6, // ...
  144. '8' => 5,
  145. '9' => 4,
  146. '10' => 3,
  147. '11' => 2,
  148. '12' => 1
  149. );
  150. test_reps($dice, "dice", $repetitions);
  151.  
  152. $arr2 = array(
  153. '0' => 265000, // Area
  154. '1' => 190000,
  155. '2' => 30000,
  156. '3' => 1300
  157. );
  158. test_reps($arr2, "OP's", $repetitions);
  159.  
  160. $arr = array(
  161. '0' => 265000, // Area
  162. '1' => 190000,
  163. '2' => 30000,
  164. '3' => 1300
  165. );
  166. test_steps($arr, "OP's", $repetitions);
  167.  
  168. $population = array(
  169. "Washington" => 658.893, // thousands
  170. "New York" => 8406,
  171. "Chicago" => 2719,
  172. "Pasadena" => 139.731,
  173. "Seattle" => 652.405
  174. );
  175. test_steps($population, "population", $repetitions);
  176.  
  177. ?>
Success #stdin #stdout 2.41s 52472KB
stdin
Standard input is empty
stdout
Testing repetitions method with coins array.
Distribution of 500,000 samples (generated in 0.41166 seconds):
======================================================
    key              occurences     (%)   (expected %) 
------------------------------------------------------
    head:              250,092     50.018    50.000
    tails:             249,908     49.982    50.000
------------------------------------------------------

Testing repetitions method with dice array.
Distribution of 500,000 samples (generated in 0.40726 seconds):
======================================================
    key              occurences     (%)   (expected %) 
------------------------------------------------------
    7:                  83,388     16.678    16.667
    6:                  69,777     13.955    13.889
    8:                  69,188     13.838    13.889
    5:                  55,736     11.147    11.111
    9:                  55,686     11.137    11.111
    10:                 41,749      8.350     8.333
    4:                  41,337      8.267     8.333
    3:                  27,904      5.581     5.556
    11:                 27,794      5.559     5.556
    12:                 13,763      2.753     2.778
    2:                  13,678      2.736     2.778
------------------------------------------------------

Testing repetitions method with OP's array.
Distribution of 500,000 samples (generated in 0.47542 seconds):
======================================================
    key              occurences     (%)   (expected %) 
------------------------------------------------------
    0:                 271,885     54.377    54.493
    1:                 195,889     39.178    39.071
    2:                  30,893      6.179     6.169
    3:                   1,333      0.267     0.267
------------------------------------------------------

Testing steps method with OP's array.
Distribution of 500,000 samples (generated in 0.43914 seconds):
======================================================
    key              occurences     (%)   (expected %) 
------------------------------------------------------
    0:                 272,714     54.543    54.493
    1:                 195,146     39.029    39.071
    2:                  30,784      6.157     6.169
    3:                   1,356      0.271     0.267
------------------------------------------------------

Testing steps method with population array.
Distribution of 500,000 samples (generated in 0.45717 seconds):
======================================================
    key              occurences     (%)   (expected %) 
------------------------------------------------------
    New York:          334,762     66.952    66.841
    Chicago:           107,955     21.591    21.620
    Washington:         25,891      5.178     5.239
    Seattle:            25,885      5.177     5.188
    Pasadena:            5,507      1.101     1.111
------------------------------------------------------