fork download
  1. # Definition of price.
  2. price <- function(rating, age) {
  3. peakAge <- rating- 50
  4. price <- rating / 2
  5. ret <- sapply(1:length(rating), function(i){
  6. if (age[i] > peakAge[i]) {
  7. p <- price[i] * (5 - (age[i] - peakAge[i]) / 2)
  8. } else {
  9. p <- price[i] * 5 * (age[i] + 1) / peakAge[i]
  10. }
  11. return(ifelse(p<0, 0, p))
  12. })
  13. return(ret)
  14. }
  15.  
  16. # Create a price dataset.
  17. wineSet1 <- function(n=300) {
  18. randVector <- runif(n * 2)
  19. rating <- randVector[1:n] * 50 + 50
  20. age <- randVector[(n+1):(n*2)] * 50
  21. price <- price(rating, age) * (runif(1) * 0.2 + 0.9)
  22. data <- cbind(rating, age, price)
  23. return(data)
  24. }
  25.  
  26. getDistances <- function(data, v1) {
  27. #distances <- sqrt(sum(sweep(data[,1:2],2,v1,FUN="-")^2))
  28. distances <- apply(data[,c(1,2)], 1, function(v2) sqrt(sum((v2 - v1) ^ 2)))
  29. distanceFrame <- cbind(distances,c(1:row(data)))#data.frame(distance=distances, index=1:nrow(data))
  30. ord=order(distanceFrame[,1])
  31. return(distanceFrame[ord,])
  32. }
  33.  
  34. # KNN estimate.
  35. knnEstimate <- function(data, v1, k=3) {
  36. v <- unlist(v1)
  37. idx <- order(sqrt(colSums((t(data[,1:2]) - v) ^ 2)))[1:k]
  38. return(mean(data[idx,3]))
  39. }
  40. # Divide data set into training set and test set.
  41. divideData <- function(data, test = 0.1) {
  42. testIndices <- which(runif(nrow(data)) < test)
  43. return(list(test=data[testIndices,], train=data[-testIndices,]))
  44. }
  45.  
  46. testAlgorithm <- function(algF, trainSet, testSet) {
  47. errorVector <- apply(testSet, 1, function(row) {
  48. #print(paste(row[3], algF(trainSet, row[1:2])))
  49. return(row[3] - algF(trainSet, row[1:2]))
  50. } )
  51. return(mean(errorVector ^ 2))
  52. }
  53.  
  54. crossValidate <- function(algF, data, trials=100, test=0.1) {
  55. errorVector <- sapply(1:trials, function(i) {
  56. #print(paste('Trail', i))
  57. dataSets <- divideData(data, test)
  58. v <- testAlgorithm(algF, dataSets[['train']], dataSets[['test']])
  59. return(v)
  60. })
  61. return(sum(errorVector) / trials)
  62. }
  63.  
  64. data <- wineSet1()
  65. print(crossValidate(weightedKNN, data))
  66.  
Success #stdin #stdout 0.31s 22832KB
stdin
Standard input is empty
stdout
Standard output is empty