fork download
  1. library(data.table)
  2.  
  3. myunique <- function(df, verbose = FALSE) {
  4. if (is.data.frame(df)) {
  5. dt <- data.table(df)
  6. } else if (is.data.table(df)) {
  7. dt <- copy(df)
  8. } else {
  9. stop("Input should be a data.frame or a data.table")
  10. }
  11.  
  12. dt[, i := 1:.N]
  13. dt[, x := as.character(x)]
  14. dt[, y := as.character(y)]
  15. dt_all <- rbind(
  16. dt,
  17. dt[, .(x = y, y = x, i = i)]
  18. )
  19.  
  20. # a dictionary x(character) -> index(integer)
  21. dt_g1 <- dt_all[, .(i = list(i)), by = x]
  22. setkey(dt_g1, x)
  23.  
  24. # a dictionary index(integer) -> x(character)
  25. dt_g2 <- dt_all[, .(x = list(x)), by = i]
  26. dt_g2[, skip := FALSE]
  27. dt_g2[, select := FALSE]
  28. setkey(dt_g2, i)
  29.  
  30. while (TRUE) {
  31. if (length(which(!dt_g2$skip)) > 0) {
  32. # select first non-skipped index
  33. cur_i <- dt_g2[, min(which(!skip))]
  34. if(verbose) message("current index:\t", cur_i, "/", nrow(dt_g2))
  35. dt_g2[cur_i, select := TRUE]
  36.  
  37. # get skipped index vector
  38. cur_x <- dt_g2[cur_i, x[[1]]]
  39. skip_i <- dt_g1[cur_x, unique(do.call(c, i))]
  40. dt_g2[skip_i, skip := TRUE]
  41. } else {
  42. break
  43. }
  44. }
  45.  
  46. # return selected row
  47. selected_idx <- dt_g2[select == TRUE, i]
  48.  
  49. return(df[selected_idx, c("x", "y")])
  50. }
  51.  
  52. cat("Input\n")
  53. df <- fread("x, y
  54. 1, 2
  55. 1, 3
  56. 2, 4
  57. 2, 6
  58. 3, 2
  59. 3, 7", header = TRUE)
  60. print(df)
  61.  
  62. cat("Output\n")
  63. myunique(df, verbose = FALSE)
  64.  
Success #stdin #stdout 0.46s 42520KB
stdin
Standard input is empty
stdout
Input
   x y
1: 1 2
2: 1 3
3: 2 4
4: 2 6
5: 3 2
6: 3 7
Output
   x y
1: 1 2
2: 3 7