fork download
  1. library(dplyr)
  2. ## Demo --
  3. ## Target Matrix ----
  4. MatTar <- matrix(c(1:3,
  5. 2:3, 1,
  6. 3, 1:2,
  7. 12,3:4), 4, 3, byrow = T)
  8. MatTar
  9. ## Data Matrix ----
  10. N <- 1e+5
  11. P <- 3
  12. set.seed(2022)
  13. MatData <- matrix(sample(1:36, size = N*P, replace = T), N ,P)
  14. MatData[1:6,] # first 6 rows
  15.  
  16.  
  17. ## Vector-String approach
  18. MatTar_to_Vec <- MatTar %>% apply(X = ., MARGIN = 1, FUN = function(X) as.character(list(as.character(X))))
  19. MatTar_to_Vec
  20.  
  21. idx_1 <- MatData %>%
  22. apply(X = ., MARGIN = 1, FUN = function(X) as.character(list(as.character(X)))) %>%
  23. is.element(., MatTar_to_Vec) %>% which
  24.  
  25.  
  26. ## Paste_locka approach ----
  27. MatTar_to_Vec <- MatTar %>% apply(X = ., MARGIN = 1, FUN = function(X) paste(X, collapse = ""))
  28. MatTar_to_Vec
  29.  
  30. idx_2 <- MatData %>%
  31. apply(X = ., MARGIN = 1, FUN = function(X) paste(X, collapse = "")) %>%
  32. is.element(., MatTar_to_Vec) %>% which
  33.  
  34. ## Vectorize_and_Wush978 approach ----
  35. idx_3 <- MatData %>% apply(X = ., MARGIN = 1, FUN = function(X) prod(colSums(abs(X - t(MatTar))))== 0 ) %>% which
  36.  
  37.  
  38. ## Result ----
  39. dimnames(MatData) <- list(paste("Row",1:N), paste("Col",1:P))
  40. # Demo_1
  41. print('Demo_1')
  42. idx_1 %>% '['(MatData, .,)
  43. # Demo_2
  44. print('Demo_2')
  45. idx_2 %>% '['(MatData, .,)
  46. print('Demo_3')
  47. # Demo_3
  48. idx_3 %>% '['(MatData, .,)
  49.  
  50.  
  51. # 1.Some bugs in Demo_2
  52. ## Row 18228 1 2 34
  53. ## Row 20352 1 23 4
  54. ## Row 62305 1 2 34
  55.  
  56. # 2.Demo_3: String matrix does NOT adapted
  57.  
  58. # 3.Demo_1: Better
  59.  
  60. ## benchmark ----
  61. # library(dplyr)
  62. # library(ggplot2)
  63. # library(microbenchmark)
  64. #
  65. # MatTar <- matrix(c(1:3,
  66. # 2:3, 1,
  67. # 3, 1:2,
  68. # 12,3:4), 4, 3, byrow = T)
  69. # N <- 1e+5
  70. # P <- 3
  71. # set.seed(2022)
  72. # MatData <- matrix(sample(1:36, size = N*P, replace = T), N ,P)
  73. #
  74. # mbm <- microbenchmark(
  75. # "Demo_1" = {
  76. # MatTar_to_Vec <- MatTar %>% apply(X = ., MARGIN = 1, FUN = function(X) as.character(list(as.character(X))))
  77. #
  78. # idx_1 <- MatData %>%
  79. # apply(X = ., MARGIN = 1, FUN = function(X) as.character(list(as.character(X)))) %>%
  80. # is.element(., MatTar_to_Vec) %>% which
  81. #
  82. # },
  83. # "Demo_2" = {
  84. # MatTar_to_Vec <- MatTar %>% apply(X = ., MARGIN = 1, FUN = function(X) paste(X, collapse = ""))
  85. # MatTar_to_Vec
  86. #
  87. # idx_2 <- MatData %>%
  88. # apply(X = ., MARGIN = 1, FUN = function(X) paste(X, collapse = "")) %>%
  89. # is.element(., MatTar_to_Vec) %>% which
  90. # },
  91. # "Demo_3" = {
  92. # idx_3 <- MatData %>% apply(X = ., MARGIN = 1, FUN = function(X) prod(colSums(abs(X - t(MatTar))))== 0 ) %>% which
  93. # },
  94. # times = 100
  95. )
  96. # mbm
  97. # autoplot(mbm)
  98.  
  99. # mbm
  100. # Unit: milliseconds
  101. # expr min lq mean median uq max neval
  102. # Demo_1 656.6060 696.4229 728.8200 728.2004 757.0074 857.5347 100
  103. # Demo_2 499.8685 585.9709 614.7522 617.6975 648.6348 758.6131 100
  104. # Demo_3 697.8651 867.4010 912.0625 934.6069 966.6985 1092.6792 100
Success #stdin #stdout #stderr 4.44s 79888KB
stdin
Standard input is empty
stdout
     [,1] [,2] [,3]
[1,]    1    2    3
[2,]    2    3    1
[3,]    3    1    2
[4,]   12    3    4
     [,1] [,2] [,3]
[1,]   30   21   10
[2,]   24   17   22
[3,]    5   20   27
[4,]   20   16    4
[5,]    7   22   28
[6,]   23    1   16
[1] "c(\"1\", \"2\", \"3\")"  "c(\"2\", \"3\", \"1\")" 
[3] "c(\"3\", \"1\", \"2\")"  "c(\"12\", \"3\", \"4\")"
[1] "123"  "231"  "312"  "1234"
[1] "Demo_1"
          Col 1 Col 2 Col 3
Row 846       1     2     3
Row 16701     3     1     2
Row 26895    12     3     4
Row 28363     1     2     3
Row 36501    12     3     4
Row 37930    12     3     4
Row 42662     2     3     1
Row 47974     1     2     3
Row 56675     1     2     3
Row 59839    12     3     4
Row 60785     1     2     3
[1] "Demo_2"
          Col 1 Col 2 Col 3
Row 846       1     2     3
Row 2686      1     2    34
Row 16701     3     1     2
Row 18807     1     2    34
Row 26895    12     3     4
Row 28363     1     2     3
Row 32866     1     2    34
Row 36501    12     3     4
Row 37930    12     3     4
Row 42662     2     3     1
Row 45073     1     2    34
Row 47974     1     2     3
Row 56675     1     2     3
Row 59839    12     3     4
Row 60785     1     2     3
Row 60892     1     2    34
Row 90811     1    23     4
[1] "Demo_3"
          Col 1 Col 2 Col 3
Row 846       1     2     3
Row 16701     3     1     2
Row 26895    12     3     4
Row 28363     1     2     3
Row 36501    12     3     4
Row 37930    12     3     4
Row 42662     2     3     1
Row 47974     1     2     3
Row 56675     1     2     3
Row 59839    12     3     4
Row 60785     1     2     3
stderr
Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

Error: unexpected ')' in ")"
Execution halted