fork(24) download
  1. # your code goes here
  2. # your code goes here
  3. import numpy as np
  4. # consider you have an array like this
  5. a= [1, 2, 3, 5, 10,11, 12, 13, 14, 15, 21,23, 25, 26, 27, 29, 30,31, 35, 51]
  6. # here we have decided to group all these numbers into 5 bins
  7. # i.e bins = 5
  8. # the minimum number in the array is 1
  9. # the maximum number in the array is 51
  10. # the width of each bin is calculated as = ((max - min) / bins)
  11. # width of each bin = (51-1)/5 = 10
  12. # Since we got each bin with as 10, we can choose the bin edges like this
  13. # 1 ...... 11 ....... 21 ........ 31 ....... 41 ....... 51
  14. # |---10---|----10----|----10-----|----10----|----10----|
  15. # so we have found out the bin edges now
  16. # to find thte counts we calcuate how many number of points fall into each bin
  17. # therefore the count of a bin = number of elements of a such that left_bin_egde<=ai<right_bin_edges
  18. # i. number of elements belongs to the 1st bin 1<=x<11 => 5 [1,2,3,5,10]
  19. # ii. number of elements belongs to the 2nd bin 11<=x<21 => 5 [11,12,13,14,15]
  20. # iii. number of elements belongs to the 3rd bin 21<=x<31 => 7 [21,23,25,26,27,29,30]
  21. # iii. number of elements belongs to the 3rd bin 21<=x<31 => 7 [21,23,25,26,27,29,30]
  22. # iv. number of elements belongs to the 4th bin 31<=x<41 => 2 [31,35]
  23. # v. number of elements belongs to the 5th bin 41<=x<=51 => 1 [51]
  24.  
  25. # note: from the documentation: https://d...content-available-to-author-only...y.org/doc/numpy/reference/generated/numpy.histogram.html
  26. # All but the last (righthand-most) bin is half-open i.e [1,2,3,4], the bins are [1,2), [2,3), [3,4]
  27. print('='*30, "explaining 'bin edges and counts",'='*30)
  28. counts,bins = np.histogram(a, bins=5)
  29.  
  30. print("bin edges :",bins)
  31. print("counts per each bin :",counts)
  32.  
  33. # density : bool, optional
  34. # If False, the result will contain the number of samples in each bin.
  35. # If True, the result is the value of the probability density function at the bin, normalized such that the integral over the range is 1.
  36. # Note that the sum of the histogram values will not be equal to 1 unless bins of unity width are chosen;
  37. # it is not a probability mass function.
  38.  
  39. # and from the source code
  40. #if density:
  41. # db = np.array(np.diff(bin_edges), float)
  42. # return n/db/n.sum(), bin_edges
  43.  
  44. # here the n => number of elements for each bin
  45. n = counts
  46. # and db = difference between bin edges
  47. db = np.array(np.diff(bins))
  48. # n.sum() number of all the elemnts
  49.  
  50.  
  51. print('='*30, "explaining 'density=True' parameter",'='*30)
  52. print("manual calculated densities for each bin",counts/db/counts.sum())
  53.  
  54. counts, bins = np.histogram(a, bins=5, density=True)
  55.  
  56. print("bin edges :",bins)
  57. print("counts per each bin using density=True:",counts)
  58.  
  59. print('='*30, "explaining counts/sum(counts)",'='*30)
  60. # pleasen note that the documentation says when you have density=True,
  61. # "that the sum of the histogram values will not be equal to 1"
  62.  
  63. # this is simple logic we used, to make the whole sum=1, we have devided each element by the number of whole elemnets
  64.  
  65. counts, bins = np.histogram(a, bins=5, density=True)
  66.  
  67. print("bin edges :",bins)
  68. print("counts per each bin using density=True:",counts/sum(counts))
Success #stdin #stdout 0.13s 92224KB
stdin
Standard input is empty
stdout
============================== explaining 'bin edges and counts ==============================
bin edges : [  1.  11.  21.  31.  41.  51.]
counts per each bin : [5 5 7 2 1]
============================== explaining 'density=True' parameter ==============================
manual calculated densities for each bin [ 0.025  0.025  0.035  0.01   0.005]
bin edges : [  1.  11.  21.  31.  41.  51.]
counts per each bin using density=True: [ 0.025  0.025  0.035  0.01   0.005]
============================== explaining counts/sum(counts) ==============================
bin edges : [  1.  11.  21.  31.  41.  51.]
counts per each bin using density=True: [ 0.25  0.25  0.35  0.1   0.05]