fork(1) download
  1. #pragma GCC optimize("Ofast")
  2. #pragma GCC target("avx")
  3.  
  4. #include <iostream>
  5. #include <iomanip>
  6. #include <algorithm>
  7. #include <assert.h>
  8. #include <immintrin.h>
  9.  
  10. struct SqrtDecomposition {
  11.  
  12. const int nGroups;
  13.  
  14. float *gcnt;
  15.  
  16. float *data;
  17.  
  18. static const int gsize = 256;
  19.  
  20. SqrtDecomposition(const int n) : nGroups((n + gsize - 1) / gsize) {
  21. gcnt = (float*)_mm_malloc(nGroups * sizeof(float), 32);
  22. data = (float*)_mm_malloc(n * sizeof(float), 32);
  23. std::fill(data, data + n, 0);
  24. std::fill(gcnt, gcnt + nGroups, 0);
  25. }
  26.  
  27. void inc(int pos, int delta) {
  28. gcnt[pos / gsize] += (float)delta;
  29. data[pos] += (float)delta;
  30. }
  31.  
  32. ~SqrtDecomposition() {
  33. _mm_free(data);
  34. _mm_free(gcnt);
  35. }
  36.  
  37. };
  38.  
  39. namespace Solution {
  40.  
  41. const int NMAX = 100000;
  42.  
  43. const int blockSize = 1024;
  44.  
  45. const int MaxNBlocks = (NMAX + blockSize - 1) / blockSize;
  46.  
  47. SqrtDecomposition* cntTillBorder[MaxNBlocks];
  48.  
  49. SqrtDecomposition* zeroBuffer;
  50.  
  51. float* arr;
  52.  
  53. int n, nBlocks;
  54.  
  55. void alloc(int n_) {
  56. n = n_;
  57. nBlocks = (n + blockSize - 1) / blockSize;
  58. arr = (float*)_mm_malloc(n * sizeof(float), 32);
  59. zeroBuffer = new SqrtDecomposition(n);
  60. for (int i = 0; i < nBlocks; ++i) {
  61. cntTillBorder[i] = new SqrtDecomposition(n);
  62. }
  63. }
  64.  
  65. void free() {
  66. delete zeroBuffer;
  67. _mm_free(arr);
  68. for (int i = 0; i < nBlocks; ++i) {
  69. delete cntTillBorder[i];
  70. }
  71. }
  72.  
  73. void precalc() {
  74. for (int i = 0; i < n; ++i) {
  75. int value = (int)arr[i];
  76. for (int b = i / blockSize; b < nBlocks; ++b) {
  77. cntTillBorder[b]->inc(value, +1);
  78. }
  79. }
  80. }
  81.  
  82. void naiveUpdateItems(int begin, int after, float x, float y) {
  83. int cnt = 0;
  84. for (int i = begin; i < after; ++i) {
  85. cnt += (arr[i] == x);
  86. arr[i] = (arr[i] == x) ? y : arr[i];
  87. }
  88. for (int b = begin / blockSize; b < nBlocks; ++b) {
  89. cntTillBorder[b]->inc(int(x), -cnt);
  90. cntTillBorder[b]->inc(int(y), +cnt);
  91. }
  92. }
  93.  
  94. void modify(int lt, int rt, float x, float y) {
  95. int bl = lt / blockSize;
  96. int br = rt / blockSize;
  97. if (bl == br) {
  98. naiveUpdateItems(lt, rt+1, x, y);
  99. return;
  100. }
  101. int changes = 0;
  102. for (int i = lt; i < (bl+1) * blockSize; ++i) {
  103. changes += (arr[i] == x);
  104. arr[i] = (arr[i] == x) ? y : arr[i];
  105. }
  106. cntTillBorder[bl]->inc(int(x), -changes);
  107. cntTillBorder[bl]->inc(int(y), +changes);
  108.  
  109. __m256 vx = _mm256_set1_ps(x);
  110. __m256 vy = _mm256_set1_ps(y);
  111.  
  112. for (int b = bl+1; b < br; ++b) {
  113. float *blockBegin = arr + b * blockSize;
  114. for (int i = 0; i + 31 < blockSize; i += 32) {
  115. uint32_t bitmask = 0;
  116. for (int j = 0; j < 32; j += 8) {
  117. __m256 va = _mm256_load_ps(blockBegin + i + j);
  118. __m256 rs = _mm256_cmp_ps(vx, va, _CMP_EQ_OQ);
  119. bitmask = (bitmask << 8) | _mm256_movemask_ps(rs);
  120. _mm256_maskstore_ps(blockBegin + i + j, _mm256_cvtps_epi32(rs), vy);
  121. }
  122. changes += __builtin_popcountll(bitmask);
  123. }
  124. cntTillBorder[b]->inc(int(x), -changes);
  125. cntTillBorder[b]->inc(int(y), +changes);
  126. }
  127.  
  128. for (int i = br * blockSize; i <= rt; ++i) {
  129. changes += (arr[i] == x);
  130. arr[i] = (arr[i] == x) ? y : arr[i];
  131. }
  132. for (int b = br; b < nBlocks; ++b) {
  133. cntTillBorder[b]->inc(int(x), -changes);
  134. cntTillBorder[b]->inc(int(y), +changes);
  135. }
  136.  
  137. }
  138.  
  139. int nth_element(int lt, int rt, int k) {
  140. int bl = lt / blockSize;
  141. int br = rt / blockSize;
  142. SqrtDecomposition* arrLT = (bl == 0) ? zeroBuffer : cntTillBorder[bl-1];
  143. SqrtDecomposition* arrRT = cntTillBorder[br];
  144. for (int i = rt+1; i < std::min((br+1) * blockSize, n); ++i) { arrRT->inc((int)arr[i], -1); }
  145. for (int i = bl * blockSize; i < lt; ++i) { arrLT->inc((int)arr[i], +1); }
  146. int last = -1;
  147. static const int STEP = SqrtDecomposition::gsize;
  148. for (int g = 0; g < arrLT->nGroups; ++g) {
  149. int cnt = int(arrRT->gcnt[g] - arrLT->gcnt[g]);
  150. if (cnt >= k) { last = g * STEP - 1; break; }
  151. k -= cnt;
  152. last = g * STEP + STEP - 1;
  153. }
  154.  
  155. int cnt = 0;
  156. while (cnt < k) { last++; cnt += int(arrRT->data[last] - arrLT->data[last]); }
  157. for (int i = bl * blockSize; i < lt; ++i) { arrLT->inc((int)arr[i], -1); }
  158. for (int i = rt+1; i < std::min((br+1) * blockSize, n); ++i) { arrRT->inc((int)arr[i], +1); }
  159. return last;
  160. }
  161. }
  162.  
  163. char getChar() {
  164. static const int SIZE = 1 << 16;
  165. static char buffer[SIZE];
  166. static int pos = 0;
  167. static int size = 0;
  168. if (pos == size) {
  169. size = (int)fread(buffer, 1, SIZE, stdin),
  170. pos = 0;
  171. }
  172. if (pos == size) {
  173. return EOF;
  174. }
  175. return buffer[pos++];
  176. }
  177.  
  178. template<typename T>
  179. T getInt() {
  180. char c = '?';
  181. while (!(c == '-' || ('0' <= c && c <= '9'))) { c = getChar(); }
  182. bool pos = true;
  183. if (c == '-') { pos = false; c = getChar(); }
  184. T ret = 0;
  185. while ('0' <= c && c <= '9') { (ret *= 10) += (c - '0'); c = getChar(); }
  186. return pos ? ret : -ret;
  187. }
  188.  
  189. void putChar(char c) {
  190. static const int SIZE = 1 << 16;
  191. static char buffer[SIZE];
  192. static int size = 0;
  193. if (size == SIZE || c == EOF) {
  194. fwrite(buffer, 1, size, stdout),
  195. size = 0;
  196. }
  197. if (c != EOF) { buffer[size++] = c; }
  198. }
  199.  
  200. template<typename T>
  201. void putInt(T value) {
  202. bool pos = true;
  203. if (value < 0) { pos = false; value = -value; }
  204. static char buf[24];
  205. int size = 0;
  206. do { buf[size++] = char(value % 10 + '0'); value /= 10; } while (value > 0);
  207. if (!pos) { buf[size++] = '-'; }
  208. while (size--) { putChar(buf[size]); }
  209. }
  210.  
  211. #include <random>
  212.  
  213. void stressTest() {
  214. int n = 100000, q = 100000;
  215. Solution::alloc(n);
  216. for (int i = 0; i < n; ++i) {
  217. Solution::arr[i] = float(n-1);
  218. }
  219. Solution::precalc();
  220. std::mt19937 gen;
  221. std::uniform_int_distribution<int> dist(0, n-1);
  222. int last = n-1;
  223. uint64_t sum = 0;
  224. for (int id = 0; id < q; ++id) {
  225. if (id % 2 == 0) {
  226. int l = dist(gen);
  227. int r = dist(gen);
  228. if (l > r) { std::swap(l, r); }
  229. auto res = Solution::nth_element(l, r, (r-l+1));
  230. assert(res == last);
  231. sum += res;
  232. } else {
  233. int x = last;
  234. int y = (n-1) + (n-2) - last;
  235. Solution::modify(0, n-1, x, y);
  236. last = y;
  237. }
  238. }
  239. assert(sum != 0);
  240. Solution::free();
  241. std::exit(0);
  242. }
  243.  
  244. int main() {
  245. //stressTest();
  246.  
  247. int n = getInt<int>(), q = getInt<int>();
  248. Solution::alloc(n);
  249. for (int i = 0; i < n; ++i) {
  250. Solution::arr[i] = (float)(getInt<int>()-1);
  251. }
  252. Solution::precalc();
  253.  
  254. while (q--) {
  255. int t = getInt<int>();
  256. if (t == 1) {
  257. int lt = getInt<int>()-1, rt = getInt<int>()-1, x = getInt<int>()-1, y = getInt<int>()-1;
  258. Solution::modify(lt, rt, (float)x, (float)y);
  259. } else {
  260. int lt = getInt<int>()-1, rt = getInt<int>()-1, k = getInt<int>();
  261. putInt(Solution::nth_element(lt, rt, k)+1);
  262. putChar('\n');
  263. assert(t == 2);
  264. }
  265. }
  266. putChar(EOF);
  267. Solution::free();
  268. return 0;
  269. }
Success #stdin #stdout 0s 15376KB
stdin
3 3
2 3 3
2 1 3 1
1 1 3 3 1
2 1 3 2
stdout
2
1