df1 <- orderData %>% 
  group_by(member_no) %>% 
  arrange(Year_month) %>% 
  #Remove Instances where the same product appears repeatedly
  distinct(member_no, category, .keep_all = TRUE) %>%
  #Create Item ID Within Customer ID
  mutate(item_id = row_number()) %>% 
  select(member_no, Year_month, item_id, category) %>% 
  ungroup() %>% 
  #Convert Everything to Factor
  mutate(across(.cols = c("member_no", "category"), .f = as.factor))

df1 <- df1[order(df1$member_no),] # descending order

# handle the special case where one person purchased multiple products on the same date  --------
df2 <- df1
# create unique id for each person-date pair
df2$unique <- paste0(as.character(df2$member_no)," ", as.character(df2$Year_month)) 
df2 <- df2 %>% 
  # if a person purchased multiple products on the same date, 
  # we need to merge these products into a basket like (A,B) on a single row
  # otherwise, cspade will throw an error 
  dplyr::group_by(unique) %>%
  dplyr::summarise(product = paste(category, collapse = ","))

df2$member_no <- word(df2$unique, 1) # restore person id that was lost in the last step
df2$Year_month <- word(df2$unique, 2)  # restore fill date that was lost in the last step

df2 <- df2 %>% 
  group_by(member_no) %>% 
  arrange(Year_month) %>% 
  mutate(item_id = row_number()) %>% #Create Item ID Within person ID
  select(member_no, Year_month, item_id, product) %>% 
  ungroup()

df2 <- df2 %>% arrange(member_no)

save(df2,file="df2.Rda")

# c-spade pre-process -----------------------------------------------------

load("df2.Rda")
df2$product <- factor(df2$product, levels = c('A', 'B', 'C'))

sessions <-  as(df2 %>% transmute(items = product), "transactions")
transactionInfo(sessions)$sequenceID <- df2$member_no |> as.factor()
transactionInfo(sessions)$eventID <- df2$item_id
itemLabels(sessions) <- str_replace_all(itemLabels(sessions), "items=", "")

# cspade ------------------------------------------------------------------

itemsets <- cspade(sessions, 
                   parameter = list(support = 0.0001), 
                   control = list(verbose = FALSE))

inspect((itemsets))
