df1 <- orderData %>%
group_by(member_no) %>%
arrange(Year_month) %>%
#Remove Instances where the same product appears repeatedly
distinct(member_no, category, .keep_all = TRUE) %>%
#Create Item ID Within Customer ID
mutate(item_id = row_number()) %>%
select(member_no, Year_month, item_id, category) %>%
ungroup() %>%
#Convert Everything to Factor
mutate(across(.cols = c("member_no", "category"), .f = as.factor))
df1 <- df1[order(df1$member_no),] # descending order
# handle the special case where one person purchased multiple products on the same date --------
df2 <- df1
# create unique id for each person-date pair
df2$unique <- paste0(as.character(df2$member_no)," ", as.character(df2$Year_month))
df2 <- df2 %>%
# if a person purchased multiple products on the same date,
# we need to merge these products into a basket like (A,B) on a single row
# otherwise, cspade will throw an error
dplyr::group_by(unique) %>%
dplyr::summarise(product = paste(category, collapse = ","))
df2$member_no <- word(df2$unique, 1) # restore person id that was lost in the last step
df2$Year_month <- word(df2$unique, 2) # restore fill date that was lost in the last step
df2 <- df2 %>%
group_by(member_no) %>%
arrange(Year_month) %>%
mutate(item_id = row_number()) %>% #Create Item ID Within person ID
select(member_no, Year_month, item_id, product) %>%
ungroup()
df2 <- df2 %>% arrange(member_no)
save(df2,file="df2.Rda")
# c-spade pre-process -----------------------------------------------------
load("df2.Rda")
df2$product <- factor(df2$product, levels = c('A', 'B', 'C'))
sessions <- as(df2 %>% transmute(items = product), "transactions")
transactionInfo(sessions)$sequenceID <- df2$member_no |> as.factor()
transactionInfo(sessions)$eventID <- df2$item_id
itemLabels(sessions) <- str_replace_all(itemLabels(sessions), "items=", "")
# cspade ------------------------------------------------------------------
itemsets <- cspade(sessions,
parameter = list(support = 0.0001),
control = list(verbose = FALSE))
inspect((itemsets))
ZGYxIDwtIG9yZGVyRGF0YSAlPiUgCiAgZ3JvdXBfYnkobWVtYmVyX25vKSAlPiUgCiAgYXJyYW5nZShZZWFyX21vbnRoKSAlPiUgCiAgI1JlbW92ZSBJbnN0YW5jZXMgd2hlcmUgdGhlIHNhbWUgcHJvZHVjdCBhcHBlYXJzIHJlcGVhdGVkbHkKICBkaXN0aW5jdChtZW1iZXJfbm8sIGNhdGVnb3J5LCAua2VlcF9hbGwgPSBUUlVFKSAlPiUKICAjQ3JlYXRlIEl0ZW0gSUQgV2l0aGluIEN1c3RvbWVyIElECiAgbXV0YXRlKGl0ZW1faWQgPSByb3dfbnVtYmVyKCkpICU+JSAKICBzZWxlY3QobWVtYmVyX25vLCBZZWFyX21vbnRoLCBpdGVtX2lkLCBjYXRlZ29yeSkgJT4lIAogIHVuZ3JvdXAoKSAlPiUgCiAgI0NvbnZlcnQgRXZlcnl0aGluZyB0byBGYWN0b3IKICBtdXRhdGUoYWNyb3NzKC5jb2xzID0gYygibWVtYmVyX25vIiwgImNhdGVnb3J5IiksIC5mID0gYXMuZmFjdG9yKSkKCmRmMSA8LSBkZjFbb3JkZXIoZGYxJG1lbWJlcl9ubyksXSAjIGRlc2NlbmRpbmcgb3JkZXIKCiMgaGFuZGxlIHRoZSBzcGVjaWFsIGNhc2Ugd2hlcmUgb25lIHBlcnNvbiBwdXJjaGFzZWQgbXVsdGlwbGUgcHJvZHVjdHMgb24gdGhlIHNhbWUgZGF0ZSAgLS0tLS0tLS0KZGYyIDwtIGRmMQojIGNyZWF0ZSB1bmlxdWUgaWQgZm9yIGVhY2ggcGVyc29uLWRhdGUgcGFpcgpkZjIkdW5pcXVlIDwtIHBhc3RlMChhcy5jaGFyYWN0ZXIoZGYyJG1lbWJlcl9ubyksIiAiLCBhcy5jaGFyYWN0ZXIoZGYyJFllYXJfbW9udGgpKSAKZGYyIDwtIGRmMiAlPiUgCiAgIyBpZiBhIHBlcnNvbiBwdXJjaGFzZWQgbXVsdGlwbGUgcHJvZHVjdHMgb24gdGhlIHNhbWUgZGF0ZSwgCiAgIyB3ZSBuZWVkIHRvIG1lcmdlIHRoZXNlIHByb2R1Y3RzIGludG8gYSBiYXNrZXQgbGlrZSAoQSxCKSBvbiBhIHNpbmdsZSByb3cKICAjIG90aGVyd2lzZSwgY3NwYWRlIHdpbGwgdGhyb3cgYW4gZXJyb3IgCiAgZHBseXI6Omdyb3VwX2J5KHVuaXF1ZSkgJT4lCiAgZHBseXI6OnN1bW1hcmlzZShwcm9kdWN0ID0gcGFzdGUoY2F0ZWdvcnksIGNvbGxhcHNlID0gIiwiKSkKCmRmMiRtZW1iZXJfbm8gPC0gd29yZChkZjIkdW5pcXVlLCAxKSAjIHJlc3RvcmUgcGVyc29uIGlkIHRoYXQgd2FzIGxvc3QgaW4gdGhlIGxhc3Qgc3RlcApkZjIkWWVhcl9tb250aCA8LSB3b3JkKGRmMiR1bmlxdWUsIDIpICAjIHJlc3RvcmUgZmlsbCBkYXRlIHRoYXQgd2FzIGxvc3QgaW4gdGhlIGxhc3Qgc3RlcAoKZGYyIDwtIGRmMiAlPiUgCiAgZ3JvdXBfYnkobWVtYmVyX25vKSAlPiUgCiAgYXJyYW5nZShZZWFyX21vbnRoKSAlPiUgCiAgbXV0YXRlKGl0ZW1faWQgPSByb3dfbnVtYmVyKCkpICU+JSAjQ3JlYXRlIEl0ZW0gSUQgV2l0aGluIHBlcnNvbiBJRAogIHNlbGVjdChtZW1iZXJfbm8sIFllYXJfbW9udGgsIGl0ZW1faWQsIHByb2R1Y3QpICU+JSAKICB1bmdyb3VwKCkKCmRmMiA8LSBkZjIgJT4lIGFycmFuZ2UobWVtYmVyX25vKQoKc2F2ZShkZjIsZmlsZT0iZGYyLlJkYSIpCgojIGMtc3BhZGUgcHJlLXByb2Nlc3MgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KCmxvYWQoImRmMi5SZGEiKQpkZjIkcHJvZHVjdCA8LSBmYWN0b3IoZGYyJHByb2R1Y3QsIGxldmVscyA9IGMoJ0EnLCAnQicsICdDJykpCgpzZXNzaW9ucyA8LSAgYXMoZGYyICU+JSB0cmFuc211dGUoaXRlbXMgPSBwcm9kdWN0KSwgInRyYW5zYWN0aW9ucyIpCnRyYW5zYWN0aW9uSW5mbyhzZXNzaW9ucykkc2VxdWVuY2VJRCA8LSBkZjIkbWVtYmVyX25vIHw+IGFzLmZhY3RvcigpCnRyYW5zYWN0aW9uSW5mbyhzZXNzaW9ucykkZXZlbnRJRCA8LSBkZjIkaXRlbV9pZAppdGVtTGFiZWxzKHNlc3Npb25zKSA8LSBzdHJfcmVwbGFjZV9hbGwoaXRlbUxhYmVscyhzZXNzaW9ucyksICJpdGVtcz0iLCAiIikKCiMgY3NwYWRlIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQoKaXRlbXNldHMgPC0gY3NwYWRlKHNlc3Npb25zLCAKICAgICAgICAgICAgICAgICAgIHBhcmFtZXRlciA9IGxpc3Qoc3VwcG9ydCA9IDAuMDAwMSksIAogICAgICAgICAgICAgICAgICAgY29udHJvbCA9IGxpc3QodmVyYm9zZSA9IEZBTFNFKSkKCmluc3BlY3QoKGl0ZW1zZXRzKSkK