library(data.table)
myunique <- function(df, verbose = FALSE) {
if (is.data.frame(df)) {
dt <- data.table(df)
} else if (is.data.table(df)) {
dt <- copy(df)
} else {
stop("Input should be a data.frame or a data.table")
}
dt[, i := 1:.N]
dt[, x := as.character(x)]
dt[, y := as.character(y)]
dt_all <- rbind(
dt,
dt[, .(x = y, y = x, i = i)]
)
# a dictionary x(character) -> index(integer)
dt_g1 <- dt_all[, .(i = list(i)), by = x]
setkey(dt_g1, x)
# a dictionary index(integer) -> x(character)
dt_g2 <- dt_all[, .(x = list(x)), by = i]
dt_g2[, skip := FALSE]
dt_g2[, select := FALSE]
setkey(dt_g2, i)
while (TRUE) {
if (length(which(!dt_g2$skip)) > 0) {
# select first non-skipped index
cur_i <- dt_g2[, min(which(!skip))]
if(verbose) message("current index:\t", cur_i, "/", nrow(dt_g2))
dt_g2[cur_i, select := TRUE]
# get skipped index vector
cur_x <- dt_g2[cur_i, x[[1]]]
skip_i <- dt_g1[cur_x, unique(do.call(c, i))]
dt_g2[skip_i, skip := TRUE]
} else {
break
}
}
# return selected row
selected_idx <- dt_g2[select == TRUE, i]
return(df[selected_idx, c("x", "y")])
}
cat("Input\n")
1, 2
1, 3
2, 4
2, 6
3, 2
3, 7", header = TRUE)
print(df)
cat("Output\n")
myunique(df, verbose = FALSE)
bGlicmFyeShkYXRhLnRhYmxlKQoKbXl1bmlxdWUgPC0gZnVuY3Rpb24oZGYsIHZlcmJvc2UgPSBGQUxTRSkgewogICAgaWYgKGlzLmRhdGEuZnJhbWUoZGYpKSB7CiAgICAgICAgZHQgPC0gZGF0YS50YWJsZShkZikKICAgIH0gZWxzZSBpZiAoaXMuZGF0YS50YWJsZShkZikpIHsKICAgICAgICBkdCA8LSBjb3B5KGRmKQogICAgfSBlbHNlIHsKICAgICAgICBzdG9wKCJJbnB1dCBzaG91bGQgYmUgYSBkYXRhLmZyYW1lIG9yIGEgZGF0YS50YWJsZSIpCiAgICB9CiAgICAKICAgIGR0WywgaSA6PSAxOi5OXQogICAgZHRbLCB4IDo9IGFzLmNoYXJhY3Rlcih4KV0KICAgIGR0WywgeSA6PSBhcy5jaGFyYWN0ZXIoeSldCiAgICBkdF9hbGwgPC0gcmJpbmQoCiAgICAgICAgZHQsCiAgICAgICAgZHRbLCAuKHggPSB5LCB5ID0geCwgaSA9IGkpXQogICAgKQogICAgCiAgICAjIGEgZGljdGlvbmFyeSB4KGNoYXJhY3RlcikgLT4gaW5kZXgoaW50ZWdlcikKICAgIGR0X2cxIDwtIGR0X2FsbFssIC4oaSA9IGxpc3QoaSkpLCBieSA9IHhdCiAgICBzZXRrZXkoZHRfZzEsIHgpCgogICAgIyBhIGRpY3Rpb25hcnkgaW5kZXgoaW50ZWdlcikgLT4geChjaGFyYWN0ZXIpCiAgICBkdF9nMiA8LSBkdF9hbGxbLCAuKHggPSBsaXN0KHgpKSwgYnkgPSBpXQogICAgZHRfZzJbLCBza2lwICAgOj0gRkFMU0VdCiAgICBkdF9nMlssIHNlbGVjdCA6PSBGQUxTRV0KICAgIHNldGtleShkdF9nMiwgaSkKICAgIAogICAgd2hpbGUgKFRSVUUpIHsKICAgICAgICBpZiAobGVuZ3RoKHdoaWNoKCFkdF9nMiRza2lwKSkgPiAwKSB7CiAgICAgICAgICAgICMgc2VsZWN0IGZpcnN0IG5vbi1za2lwcGVkIGluZGV4CiAgICAgICAgICAgIGN1cl9pICA8LSBkdF9nMlssIG1pbih3aGljaCghc2tpcCkpXQogICAgICAgICAgICBpZih2ZXJib3NlKSBtZXNzYWdlKCJjdXJyZW50IGluZGV4Olx0IiwgY3VyX2ksICIvIiwgbnJvdyhkdF9nMikpCiAgICAgICAgICAgIGR0X2cyW2N1cl9pLCBzZWxlY3QgOj0gVFJVRV0KICAgICAgICAgICAgCiAgICAgICAgICAgICMgZ2V0IHNraXBwZWQgaW5kZXggdmVjdG9yCiAgICAgICAgICAgIGN1cl94ICA8LSBkdF9nMltjdXJfaSwgeFtbMV1dXQogICAgICAgICAgICBza2lwX2kgPC0gZHRfZzFbY3VyX3gsIHVuaXF1ZShkby5jYWxsKGMsIGkpKV0KICAgICAgICAgICAgZHRfZzJbc2tpcF9pLCBza2lwIDo9IFRSVUVdCiAgICAgICAgfSBlbHNlIHsKICAgICAgICAgICAgYnJlYWsKICAgICAgICB9CiAgICB9CiAgICAKICAgICMgcmV0dXJuIHNlbGVjdGVkIHJvdwogICAgc2VsZWN0ZWRfaWR4IDwtIGR0X2cyW3NlbGVjdCA9PSBUUlVFLCBpXQogICAgCiAgICByZXR1cm4oZGZbc2VsZWN0ZWRfaWR4LCBjKCJ4IiwgInkiKV0pCn0gCgpjYXQoIklucHV0XG4iKQpkZiA8LSBmcmVhZCgieCwgeQoJMSwgMgoJMSwgMwoJMiwgNAoJMiwgNgoJMywgMgoJMywgNyIsIGhlYWRlciA9IFRSVUUpCnByaW50KGRmKQoKY2F0KCJPdXRwdXRcbiIpCm15dW5pcXVlKGRmLCB2ZXJib3NlID0gRkFMU0UpCg==