import os
import soundfile
import librosa
import sklearn
import numpy as np
import matplotlib.pyplot as plt
from scipy.io.wavfile import read, write
from IPython.display import Audio
from numpy.fft import fft, ifft
%matplotlib inline

directory = os.fsencode('Kick Drums')
featureSet = np.zeros((5, 1))

def extract_features(x, fs, file):
    fileName = os.path.split(file)[1]
    y, sr = librosa.load(file)
    hop_length = 256
    frame_length = 512
    #zero crossing rate
    zcr = librosa.zero_crossings(x).sum()
    #energy
    energy = np.array([
        sum(abs(x[i:i+frame_length]**2))
        for i in range(0, len(x), hop_length)
    ])
    #spectral things
    centroid = np.asarray(librosa.feature.spectral_centroid(y=y, sr=sr))
    bandwidth = np.asarray(librosa.feature.spectral_bandwidth(y=y, sr=sr))

    #return np.array([zcr, energy, centroid, bandwidth, fileName])
    return np.array([1, [10, 20, 30, 40], [50, 60, 70, 90, 90], 4, fileName])
    
for filename in os.listdir(directory):
    
    #doing file things
    fileStr = os.path.join(directory, filename)
    print(fileStr)
    data, samplerate = soundfile.read(fileStr)
    #os.remove(fileStr)
    soundfile.write(fileStr, data, samplerate, subtype='PCM_16')
    x, fs = librosa.load(fileStr)
    features = extract_features(x, fs, fileStr)
    featureSet = np.column_stack((featureSet, features))

for element in featureSet:
    if type(element) is np.ndarray:
        element = np.transpose(element)
        element = *element,

        
print(featureSet)
#clustering!
model = sklearn.cluster.KMeans(n_clusters=2)
labels = model.fit_predict(featureSet)
print("pre-labels")
#print(labels)
print("post-labels")
