fork download
  1. # Definition of price.
  2. price <- function(rating, age) {
  3. peakAge <- rating- 50
  4. price <- rating / 2
  5. price <- sapply(1:length(rating), function(i){
  6. if (age[i] > peakAge[i]) {
  7. p <- price[i] * (5 - (age[i] - peakAge[i]))
  8. } else {
  9. p <- price[i] * 5 * (age[i] + 1) / peakAge[i]
  10. }
  11. return(ifelse(p<0, 0, p))
  12. })
  13. return(price)
  14. }
  15.  
  16. # Create a price dataset.
  17. createSet <- function(n=300) {
  18. randVector <- runif(n * 2)
  19. rating <- randVector[1:n] * 50 + 50
  20. age <- randVector[(n+1):(n*2)] * 50
  21. price <- price(rating, age) * (runif(1) * 0.2 + 0.9)
  22. data <- cbind(rating, age, price)
  23. return(data)
  24. }
  25.  
  26. getDistances <- function(data, v1) {
  27. distances <- apply(data[,c(1,2)], 1, function(v2) sqrt(sum((v2 - v1) ^ 2)))
  28. distanceFrame <- data.frame(distance=distances, index=1:nrow(data))
  29. ord=order(distanceFrame[,'distance'])
  30. return(distanceFrame[ord,])
  31. }
  32.  
  33. # KNN estimate.
  34. knnEstimate <- function(data, v1, k=3) {
  35. distances <- getDistances(data[,c(1,2)], v1)[1:k,]
  36. ret <- mean(data[distances[,'index'], 'price'])
  37. return(ret)
  38. }
  39.  
  40. # Divide data set into training set and test set.
  41. divideData <- function(data, test = 0.1) {
  42. testIndices <- which(runif(nrow(data)) < test)
  43. return(list(test=data[testIndices,], train=data[-testIndices,]))
  44. }
  45.  
  46. testAlgorithm <- function(algF, trainSet, testSet) {
  47. errorVector <- apply(testSet, 1, function(i) {
  48. return(testSet[3] - algF(trainSet, testSet[c(1,2)]))
  49. } )
  50. return(mean(errorVector ^ 2))
  51. }
  52.  
  53. crossValidate <- function(algF, data, trials=100, test=0.1) {
  54. errorVector <- sapply(1:trials, function(i) {
  55. print(paste('Trail', i))
  56. dataSets <- divideData(data, test)
  57. return(testAlgorithm(algF, dataSets[['train']], dataSets[['test']]))
  58. })
  59. return(sum(errorVector) / trials)
  60. }
  61.  
  62. data <- createSet()
  63. print(crossValidate(knnEstimate, data))
  64.  
Success #stdin #stdout 13.74s 23896KB
stdin
Standard input is empty
stdout
[1] "Trail 1"
[1] "Trail 2"
[1] "Trail 3"
[1] "Trail 4"
[1] "Trail 5"
[1] "Trail 6"
[1] "Trail 7"
[1] "Trail 8"
[1] "Trail 9"
[1] "Trail 10"
[1] "Trail 11"
[1] "Trail 12"
[1] "Trail 13"
[1] "Trail 14"
[1] "Trail 15"
[1] "Trail 16"
[1] "Trail 17"
[1] "Trail 18"
[1] "Trail 19"
[1] "Trail 20"
[1] "Trail 21"
[1] "Trail 22"
[1] "Trail 23"
[1] "Trail 24"
[1] "Trail 25"
[1] "Trail 26"
[1] "Trail 27"
[1] "Trail 28"
[1] "Trail 29"
[1] "Trail 30"
[1] "Trail 31"
[1] "Trail 32"
[1] "Trail 33"
[1] "Trail 34"
[1] "Trail 35"
[1] "Trail 36"
[1] "Trail 37"
[1] "Trail 38"
[1] "Trail 39"
[1] "Trail 40"
[1] "Trail 41"
[1] "Trail 42"
[1] "Trail 43"
[1] "Trail 44"
[1] "Trail 45"
[1] "Trail 46"
[1] "Trail 47"
[1] "Trail 48"
[1] "Trail 49"
[1] "Trail 50"
[1] "Trail 51"
[1] "Trail 52"
[1] "Trail 53"
[1] "Trail 54"
[1] "Trail 55"
[1] "Trail 56"
[1] "Trail 57"
[1] "Trail 58"
[1] "Trail 59"
[1] "Trail 60"
[1] "Trail 61"
[1] "Trail 62"
[1] "Trail 63"
[1] "Trail 64"
[1] "Trail 65"
[1] "Trail 66"
[1] "Trail 67"
[1] "Trail 68"
[1] "Trail 69"
[1] "Trail 70"
[1] "Trail 71"
[1] "Trail 72"
[1] "Trail 73"
[1] "Trail 74"
[1] "Trail 75"
[1] "Trail 76"
[1] "Trail 77"
[1] "Trail 78"
[1] "Trail 79"
[1] "Trail 80"
[1] "Trail 81"
[1] "Trail 82"
[1] "Trail 83"
[1] "Trail 84"
[1] "Trail 85"
[1] "Trail 86"
[1] "Trail 87"
[1] "Trail 88"
[1] "Trail 89"
[1] "Trail 90"
[1] "Trail 91"
[1] "Trail 92"
[1] "Trail 93"
[1] "Trail 94"
[1] "Trail 95"
[1] "Trail 96"
[1] "Trail 97"
[1] "Trail 98"
[1] "Trail 99"
[1] "Trail 100"
[1] 5455.547