fork download
  1. df1 <- orderData %>%
  2. group_by(member_no) %>%
  3. arrange(Year_month) %>%
  4. #Remove Instances where the same product appears repeatedly
  5. distinct(member_no, category, .keep_all = TRUE) %>%
  6. #Create Item ID Within Customer ID
  7. mutate(item_id = row_number()) %>%
  8. select(member_no, Year_month, item_id, category) %>%
  9. ungroup() %>%
  10. #Convert Everything to Factor
  11. mutate(across(.cols = c("member_no", "category"), .f = as.factor))
  12.  
  13. df1 <- df1[order(df1$member_no),] # descending order
  14.  
  15. # handle the special case where one person purchased multiple products on the same date --------
  16. df2 <- df1
  17. # create unique id for each person-date pair
  18. df2$unique <- paste0(as.character(df2$member_no)," ", as.character(df2$Year_month))
  19. df2 <- df2 %>%
  20. # if a person purchased multiple products on the same date,
  21. # we need to merge these products into a basket like (A,B) on a single row
  22. # otherwise, cspade will throw an error
  23. dplyr::group_by(unique) %>%
  24. dplyr::summarise(product = paste(category, collapse = ","))
  25.  
  26. df2$member_no <- word(df2$unique, 1) # restore person id that was lost in the last step
  27. df2$Year_month <- word(df2$unique, 2) # restore fill date that was lost in the last step
  28.  
  29. df2 <- df2 %>%
  30. group_by(member_no) %>%
  31. arrange(Year_month) %>%
  32. mutate(item_id = row_number()) %>% #Create Item ID Within person ID
  33. select(member_no, Year_month, item_id, product) %>%
  34. ungroup()
  35.  
  36. df2 <- df2 %>% arrange(member_no)
  37.  
  38. save(df2,file="df2.Rda")
  39.  
  40. # c-spade pre-process -----------------------------------------------------
  41.  
  42. load("df2.Rda")
  43. df2$product <- factor(df2$product, levels = c('A', 'B', 'C'))
  44.  
  45. sessions <- as(df2 %>% transmute(items = product), "transactions")
  46. transactionInfo(sessions)$sequenceID <- df2$member_no |> as.factor()
  47. transactionInfo(sessions)$eventID <- df2$item_id
  48. itemLabels(sessions) <- str_replace_all(itemLabels(sessions), "items=", "")
  49.  
  50. # cspade ------------------------------------------------------------------
  51.  
  52. itemsets <- cspade(sessions,
  53. parameter = list(support = 0.0001),
  54. control = list(verbose = FALSE))
  55.  
  56. inspect((itemsets))
  57.  
Success #stdin #stdout #stderr 0.25s 40884KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Error: could not find function "%>%"
Execution halted