import statsmodels.formula.api as smf
import seaborn as sns
import matplotlib.pyplot as plt
from pandas import DataFrame
import tkinter as tk
import tkinter.filedialog as fd
import csv
import pandas as pd
import collections
import numpy as np
def outlier_iqr(df, chk_column, output_column):
# 列を抽出する
col = df.ix[:, chk_column]
print(col)
# 四分位数
# q1 = col.describe()['25%']
# q3 = col.describe()['75%']
q75, q25 = np.percentile(col, [75, 25])
iqr = q75 - q25 # 四分位範囲
print("25パーセント点", q25)
print("75パーセント点", q75)
print("四分位範囲", iqr)
# 外れ値の基準点
outlier_min = q25 - (iqr) * 1.5
outlier_max = q75 + (iqr) * 1.5
print('aa', outlier_min)
print(outlier_max)
print(df[chk_column])
# 範囲から外れている値を除く
for i, latency in enumerate(df[chk_column]):
if latency > outlier_max or latency < outlier_min:
df.iloc[i][output_column] = 1
print('1')
print(df.iloc[i][output_column])
else:
df.iloc[i][output_column] = 0
print('0')
print(df.iloc[i][output_column])
return df
def input_data():
root = tk.Tk()
root.withdraw()
file = fd.askopenfilename(
title="ファイルを選んでください",
filetypes=[("TEXT", "csv"), ("TEXT", "py"), ("HTML", "html")]
)
df = pd.read_csv(file)
return df
df = input_data()
df = outlier_iqr(df, 'latency_msec', 'outlier_index')
print(df)
aW1wb3J0IHN0YXRzbW9kZWxzLmZvcm11bGEuYXBpIGFzIHNtZgppbXBvcnQgc2VhYm9ybiBhcyBzbnMKaW1wb3J0IG1hdHBsb3RsaWIucHlwbG90IGFzIHBsdApmcm9tIHBhbmRhcyBpbXBvcnQgRGF0YUZyYW1lCmltcG9ydCB0a2ludGVyIGFzIHRrCmltcG9ydCB0a2ludGVyLmZpbGVkaWFsb2cgYXMgZmQKaW1wb3J0IGNzdgppbXBvcnQgcGFuZGFzIGFzIHBkCmltcG9ydCBjb2xsZWN0aW9ucwppbXBvcnQgbnVtcHkgYXMgbnAKCgpkZWYgb3V0bGllcl9pcXIoZGYsIGNoa19jb2x1bW4sIG91dHB1dF9jb2x1bW4pOgoKICAgICMg5YiX44KS5oq95Ye644GZ44KLCiAgICBjb2wgPSBkZi5peFs6LCBjaGtfY29sdW1uXQogICAgcHJpbnQoY29sKQogICAgIyDlm5vliIbkvY3mlbAKICAgICMgcTEgPSBjb2wuZGVzY3JpYmUoKVsnMjUlJ10KICAgICMgcTMgPSBjb2wuZGVzY3JpYmUoKVsnNzUlJ10KICAgIHE3NSwgcTI1ID0gbnAucGVyY2VudGlsZShjb2wsIFs3NSwgMjVdKQoKICAgIGlxciA9IHE3NSAtIHEyNSAgIyDlm5vliIbkvY3nr4Tlm7IKICAgIHByaW50KCIyNeODkeODvOOCu+ODs+ODiOeCuSIsIHEyNSkKICAgIHByaW50KCI3NeODkeODvOOCu+ODs+ODiOeCuSIsIHE3NSkKICAgIHByaW50KCLlm5vliIbkvY3nr4Tlm7IiLCBpcXIpCgogICAgIyDlpJbjgozlgKTjga7ln7rmupbngrkKICAgIG91dGxpZXJfbWluID0gcTI1IC0gKGlxcikgKiAxLjUKICAgIG91dGxpZXJfbWF4ID0gcTc1ICsgKGlxcikgKiAxLjUKCiAgICBwcmludCgnYWEnLCBvdXRsaWVyX21pbikKICAgIHByaW50KG91dGxpZXJfbWF4KQoKICAgIHByaW50KGRmW2Noa19jb2x1bW5dKQoKICAgICMg56+E5Zuy44GL44KJ5aSW44KM44Gm44GE44KL5YCk44KS6Zmk44GPCiAgICBmb3IgaSwgbGF0ZW5jeSBpbiBlbnVtZXJhdGUoZGZbY2hrX2NvbHVtbl0pOgogICAgICAgIGlmIGxhdGVuY3kgPiBvdXRsaWVyX21heCBvciBsYXRlbmN5IDwgb3V0bGllcl9taW46CiAgICAgICAgICAgIGRmLmlsb2NbaV1bb3V0cHV0X2NvbHVtbl0gPSAxCiAgICAgICAgICAgIHByaW50KCcxJykKICAgICAgICAgICAgcHJpbnQoZGYuaWxvY1tpXVtvdXRwdXRfY29sdW1uXSkKICAgICAgICBlbHNlOgogICAgICAgICAgICBkZi5pbG9jW2ldW291dHB1dF9jb2x1bW5dID0gMAogICAgICAgICAgICBwcmludCgnMCcpCiAgICAgICAgICAgIHByaW50KGRmLmlsb2NbaV1bb3V0cHV0X2NvbHVtbl0pCiAgICByZXR1cm4gZGYKCgpkZWYgaW5wdXRfZGF0YSgpOgogICAgcm9vdCA9IHRrLlRrKCkKICAgIHJvb3Qud2l0aGRyYXcoKQogICAgZmlsZSA9IGZkLmFza29wZW5maWxlbmFtZSgKICAgICAgICB0aXRsZT0i44OV44Kh44Kk44Or44KS6YG444KT44Gn44GP44Gg44GV44GEIiwKICAgICAgICBmaWxldHlwZXM9WygiVEVYVCIsICJjc3YiKSwgKCJURVhUIiwgInB5IiksICgiSFRNTCIsICJodG1sIildCiAgICApCgogICAgZGYgPSBwZC5yZWFkX2NzdihmaWxlKQoKICAgIHJldHVybiBkZgoKCmRmID0gaW5wdXRfZGF0YSgpCmRmID0gb3V0bGllcl9pcXIoZGYsICdsYXRlbmN5X21zZWMnLCAnb3V0bGllcl9pbmRleCcpCnByaW50KGRmKQ==