fork(1) download
  1. #include <stdint.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <emmintrin.h>
  5. #include <math.h>
  6.  
  7. struct md5x4_context {
  8. __m128i k[64];
  9. __m128i iv[4];
  10. __m128i ff;
  11.  
  12. __m128i a, b, c, d;
  13. };
  14.  
  15. void md5x4_pre_init(struct md5x4_context* ctx) {
  16. for (uint32_t i = 0; i < 64; ++i) {
  17. uint32_t x = floor(fabs(sin(i + 1)) * (double)0x100000000ull);
  18. ctx->k[i] = _mm_set1_epi32(x);
  19. }
  20. ctx->iv[0] = _mm_set1_epi32(0x67452301);
  21. ctx->iv[1] = _mm_set1_epi32(0xefcdab89);
  22. ctx->iv[2] = _mm_set1_epi32(0x98badcfe);
  23. ctx->iv[3] = _mm_set1_epi32(0x10325476);
  24. ctx->ff = _mm_set1_epi32(0xFFFFFFFF);
  25. }
  26.  
  27. void md5x4_init(struct md5x4_context* ctx) {
  28. ctx->a = ctx->iv[0];
  29. ctx->b = ctx->iv[1];
  30. ctx->c = ctx->iv[2];
  31. ctx->d = ctx->iv[3];
  32. }
  33.  
  34. inline __m128i md5x4_fx(int s, __m128i a, __m128i b, __m128i k, __m128i x, __m128i f) {
  35. f = _mm_add_epi32(_mm_add_epi32(f, a), _mm_add_epi32(k, x));
  36. return _mm_add_epi32(b, _mm_or_si128(_mm_slli_epi32(f, s), _mm_srli_epi32(f, 32 - s)));
  37. }
  38.  
  39. inline __m128i md5x4_f1(int s, __m128i a, __m128i b, __m128i c, __m128i d, __m128i k, __m128i x) {
  40. return md5x4_fx(s, a, b, k, x, _mm_or_si128(_mm_and_si128(b, c), _mm_andnot_si128(b, d)));
  41. }
  42.  
  43. inline __m128i md5x4_f2(int s, __m128i a, __m128i b, __m128i c, __m128i d, __m128i k, __m128i x) {
  44. return md5x4_fx(s, a, b, k, x, _mm_or_si128(_mm_and_si128(d, b), _mm_andnot_si128(d, c)));
  45. }
  46.  
  47. inline __m128i md5x4_f3(int s, __m128i a, __m128i b, __m128i c, __m128i d, __m128i k, __m128i x) {
  48. return md5x4_fx(s, a, b, k, x, _mm_xor_si128(_mm_xor_si128(b, c), d));
  49. }
  50.  
  51. inline __m128i md5x4_f4(int s, __m128i a, __m128i b, __m128i c, __m128i d, __m128i k, __m128i x) {
  52. return md5x4_fx(s, a, b, k, x, _mm_xor_si128(c, _mm_or_si128(b, _mm_xor_si128(d, _mm_set1_epi32(0xFFFFFFFF)))));
  53. }
  54.  
  55. void md5x4_raw_update(struct md5x4_context* ctx, const uint8_t blocks[4][64])
  56. {
  57. __m128i x[16];
  58. for (size_t i = 0; i < 4; ++i) {
  59. __m128 x0 = _mm_loadu_ps((const float*)blocks[0] + i * 4);
  60. __m128 x1 = _mm_loadu_ps((const float*)blocks[1] + i * 4);
  61. __m128 x2 = _mm_loadu_ps((const float*)blocks[2] + i * 4);
  62. __m128 x3 = _mm_loadu_ps((const float*)blocks[3] + i * 4);
  63.  
  64. __m128 t0 = _mm_unpacklo_ps(x0, x1);
  65. __m128 t1 = _mm_unpackhi_ps(x0, x1);
  66. __m128 t2 = _mm_unpacklo_ps(x2, x3);
  67. __m128 t3 = _mm_unpackhi_ps(x2, x3);
  68.  
  69. x[i * 4 + 0] = _mm_castps_si128(_mm_movelh_ps(t0, t2));
  70. x[i * 4 + 1] = _mm_castps_si128(_mm_movehl_ps(t2, t0));
  71. x[i * 4 + 2] = _mm_castps_si128(_mm_movelh_ps(t1, t3));
  72. x[i * 4 + 3] = _mm_castps_si128(_mm_movehl_ps(t3, t1));
  73. }
  74.  
  75. __m128i a = ctx->a;
  76. __m128i b = ctx->b;
  77. __m128i c = ctx->c;
  78. __m128i d = ctx->d;
  79.  
  80. a = md5x4_f1(7, a, b, c, d, ctx->k[0], x[0]);
  81. d = md5x4_f1(12, d, a, b, c, ctx->k[1], x[1]);
  82. c = md5x4_f1(17, c, d, a, b, ctx->k[2], x[2]);
  83. b = md5x4_f1(22, b, c, d, a, ctx->k[3], x[3]);
  84. a = md5x4_f1(7, a, b, c, d, ctx->k[4], x[4]);
  85. d = md5x4_f1(12, d, a, b, c, ctx->k[5], x[5]);
  86. c = md5x4_f1(17, c, d, a, b, ctx->k[6], x[6]);
  87. b = md5x4_f1(22, b, c, d, a, ctx->k[7], x[7]);
  88. a = md5x4_f1(7, a, b, c, d, ctx->k[8], x[8]);
  89. d = md5x4_f1(12, d, a, b, c, ctx->k[9], x[9]);
  90. c = md5x4_f1(17, c, d, a, b, ctx->k[10], x[10]);
  91. b = md5x4_f1(22, b, c, d, a, ctx->k[11], x[11]);
  92. a = md5x4_f1(7, a, b, c, d, ctx->k[12], x[12]);
  93. d = md5x4_f1(12, d, a, b, c, ctx->k[13], x[13]);
  94. c = md5x4_f1(17, c, d, a, b, ctx->k[14], x[14]);
  95. b = md5x4_f1(22, b, c, d, a, ctx->k[15], x[15]);
  96.  
  97. a = md5x4_f2(5, a, b, c, d, ctx->k[16], x[1]);
  98. d = md5x4_f2(9, d, a, b, c, ctx->k[17], x[6]);
  99. c = md5x4_f2(14, c, d, a, b, ctx->k[18], x[11]);
  100. b = md5x4_f2(20, b, c, d, a, ctx->k[19], x[0]);
  101. a = md5x4_f2(5, a, b, c, d, ctx->k[20], x[5]);
  102. d = md5x4_f2(9, d, a, b, c, ctx->k[21], x[10]);
  103. c = md5x4_f2(14, c, d, a, b, ctx->k[22], x[15]);
  104. b = md5x4_f2(20, b, c, d, a, ctx->k[23], x[4]);
  105. a = md5x4_f2(5, a, b, c, d, ctx->k[24], x[9]);
  106. d = md5x4_f2(9, d, a, b, c, ctx->k[25], x[14]);
  107. c = md5x4_f2(14, c, d, a, b, ctx->k[26], x[3]);
  108. b = md5x4_f2(20, b, c, d, a, ctx->k[27], x[8]);
  109. a = md5x4_f2(5, a, b, c, d, ctx->k[28], x[13]);
  110. d = md5x4_f2(9, d, a, b, c, ctx->k[29], x[2]);
  111. c = md5x4_f2(14, c, d, a, b, ctx->k[30], x[7]);
  112. b = md5x4_f2(20, b, c, d, a, ctx->k[31], x[12]);
  113.  
  114. a = md5x4_f3(4, a, b, c, d, ctx->k[32], x[5]);
  115. d = md5x4_f3(11, d, a, b, c, ctx->k[33], x[8]);
  116. c = md5x4_f3(16, c, d, a, b, ctx->k[34], x[11]);
  117. b = md5x4_f3(23, b, c, d, a, ctx->k[35], x[14]);
  118. a = md5x4_f3(4, a, b, c, d, ctx->k[36], x[1]);
  119. d = md5x4_f3(11, d, a, b, c, ctx->k[37], x[4]);
  120. c = md5x4_f3(16, c, d, a, b, ctx->k[38], x[7]);
  121. b = md5x4_f3(23, b, c, d, a, ctx->k[39], x[10]);
  122. a = md5x4_f3(4, a, b, c, d, ctx->k[40], x[13]);
  123. d = md5x4_f3(11, d, a, b, c, ctx->k[41], x[0]);
  124. c = md5x4_f3(16, c, d, a, b, ctx->k[42], x[3]);
  125. b = md5x4_f3(23, b, c, d, a, ctx->k[43], x[6]);
  126. a = md5x4_f3(4, a, b, c, d, ctx->k[44], x[9]);
  127. d = md5x4_f3(11, d, a, b, c, ctx->k[45], x[12]);
  128. c = md5x4_f3(16, c, d, a, b, ctx->k[46], x[15]);
  129. b = md5x4_f3(23, b, c, d, a, ctx->k[47], x[2]);
  130.  
  131. a = md5x4_f4(6, a, b, c, d, ctx->k[48], x[0]);
  132. d = md5x4_f4(10, d, a, b, c, ctx->k[49], x[7]);
  133. c = md5x4_f4(15, c, d, a, b, ctx->k[50], x[14]);
  134. b = md5x4_f4(21, b, c, d, a, ctx->k[51], x[5]);
  135. a = md5x4_f4(6, a, b, c, d, ctx->k[52], x[12]);
  136. d = md5x4_f4(10, d, a, b, c, ctx->k[53], x[3]);
  137. c = md5x4_f4(15, c, d, a, b, ctx->k[54], x[10]);
  138. b = md5x4_f4(21, b, c, d, a, ctx->k[55], x[1]);
  139. a = md5x4_f4(6, a, b, c, d, ctx->k[56], x[8]);
  140. d = md5x4_f4(10, d, a, b, c, ctx->k[57], x[15]);
  141. c = md5x4_f4(15, c, d, a, b, ctx->k[58], x[6]);
  142. b = md5x4_f4(21, b, c, d, a, ctx->k[59], x[13]);
  143. a = md5x4_f4(6, a, b, c, d, ctx->k[60], x[4]);
  144. d = md5x4_f4(10, d, a, b, c, ctx->k[61], x[11]);
  145. c = md5x4_f4(15, c, d, a, b, ctx->k[62], x[2]);
  146. b = md5x4_f4(21, b, c, d, a, ctx->k[63], x[9]);
  147.  
  148. ctx->a = _mm_add_epi32(ctx->a, a);
  149. ctx->b = _mm_add_epi32(ctx->b, b);
  150. ctx->c = _mm_add_epi32(ctx->c, c);
  151. ctx->d = _mm_add_epi32(ctx->d, d);
  152. }
  153.  
  154. void md5x4_final(struct md5x4_context* ctx, uint8_t out[4][16]) {
  155. __m128 a = _mm_castsi128_ps(ctx->a);
  156. __m128 b = _mm_castsi128_ps(ctx->b);
  157. __m128 c = _mm_castsi128_ps(ctx->c);
  158. __m128 d = _mm_castsi128_ps(ctx->d);
  159.  
  160. __m128 t0 = _mm_unpacklo_ps(a, b);
  161. __m128 t1 = _mm_unpackhi_ps(a, b);
  162. __m128 t2 = _mm_unpacklo_ps(c, d);
  163. __m128 t3 = _mm_unpackhi_ps(c, d);
  164.  
  165. _mm_storeu_ps((float*)out[0], _mm_movelh_ps(t0, t2));
  166. _mm_storeu_ps((float*)out[1], _mm_movehl_ps(t2, t0));
  167. _mm_storeu_ps((float*)out[2], _mm_movelh_ps(t1, t3));
  168. _mm_storeu_ps((float*)out[3], _mm_movehl_ps(t3, t1));
  169. }
  170.  
  171.  
  172. void unsafe_pad_block(uint8_t block[64], size_t length) {
  173. block[length] = 0x80;
  174. *(uint64_t*)(block + 56) = length * 8;
  175. }
  176.  
  177. int main() {
  178. struct md5x4_context ctx;
  179. md5x4_pre_init(&ctx);
  180.  
  181. uint8_t data[4][64] = {
  182. "We are the others",
  183. "We are the cast-outs",
  184. "We're the outsiders",
  185. "But you can't hide us",
  186. };
  187.  
  188. unsafe_pad_block(data[0], 17);
  189. unsafe_pad_block(data[1], 20);
  190. unsafe_pad_block(data[2], 19);
  191. unsafe_pad_block(data[3], 21);
  192.  
  193. uint8_t digests[4][16] = {0};
  194.  
  195. md5x4_init(&ctx);
  196. md5x4_raw_update(&ctx, data);
  197. md5x4_final(&ctx, digests);
  198.  
  199. for (size_t i = 0; i < 4; ++i) {
  200. for (size_t j = 0; j < 16; ++j)
  201. printf("%02X ", digests[i][j]);
  202. printf("\n");
  203. }
  204.  
  205. return 0;
  206. }
Success #stdin #stdout 0s 5616KB
stdin
Standard input is empty
stdout
BF C3 9C B7 B2 1A 89 F2 B8 F2 AB C3 40 26 2E DE 
58 39 28 98 BD 81 5D 0D 65 92 B6 D8 E7 AB B1 31 
D5 91 DA 97 45 4A AD FE 4D 83 F8 0E C2 A8 C4 AB 
92 01 2E AD A8 04 FF 83 52 16 FC 81 C2 A4 27 81