fork download
  1. import pandas as pd
  2. import numpy as np
  3.  
  4. high, size = 100, 20
  5. df = pd.DataFrame({'perception': np.random.randint(0, high, size),
  6. 'age': np.random.randint(0, high, size),
  7. 'smokes_cat': pd.Categorical(np.tile(['lots', 'little', 'not'], size//3+1)[:size]),
  8. 'outcome': np.random.randint(0, high, size),
  9. 'outlook_cat': pd.Categorical(np.tile(['positive', 'neutral', 'negative'], size//3+1)[:size])
  10. })
  11. df.insert(2, 'age_cat', pd.Categorical(pd.cut(df.age, range(0, high+5, size//2), right=False,
  12. labels=["{0} - {1}".format(i, i + 9) for i in range(0, high, size//2)])))
  13.  
  14. def tierify(i):
  15. if i <= 25:
  16. return 'lowest'
  17. elif i <= 50:
  18. return 'low'
  19. elif i <= 75:
  20. return 'med'
  21. return 'high'
  22.  
  23. df.insert(1, 'perception_cat', df['perception'].map(tierify))
  24. df.insert(6, 'outcome_cat', df['outcome'].map(tierify))
  25.  
  26. np.random.shuffle(df['smokes_cat'])
  27.  
  28. print('Columns:', ', '.join(df.columns))
  29. print(df)
  30.  
Success #stdin #stdout 0.58s 61472KB
stdin
Standard input is empty
stdout
Columns: perception, perception_cat, age, age_cat, smokes_cat, outcome, outcome_cat, outlook_cat
    perception perception_cat  age  ... outcome outcome_cat  outlook_cat
0            5         lowest   81  ...      20      lowest     positive
1            5         lowest   58  ...      69         med      neutral
2           46            low   77  ...      64         med     negative
3           10         lowest   71  ...      10      lowest     positive
4           11         lowest    6  ...      15      lowest      neutral
5           65            med   53  ...      68         med     negative
6           73            med   56  ...      91        high     positive
7           99           high   51  ...      33         low      neutral
8           65            med   48  ...      72         med     negative
9           46            low   93  ...      13      lowest     positive
10          47            low   28  ...      41         low      neutral
11          82           high   13  ...      56         med     negative
12          88           high   98  ...      47         low     positive
13           3         lowest   12  ...      99        high      neutral
14          41            low   42  ...      26         low     negative
15          61            med   21  ...      32         low     positive
16          36            low   58  ...      79        high      neutral
17          79           high   67  ...      66         med     negative
18          70            med   95  ...      43         low     positive
19          60            med   80  ...      78        high      neutral

[20 rows x 8 columns]