fork(2) download
  1. global _start
  2.  
  3.  
  4.  
  5. section .data
  6.  
  7. timelo dd 0
  8. timehi dd 0
  9. b2hout db 0,0,0,0,0,0,0,0,0
  10.  
  11.  
  12. b2hlut db '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'
  13. align 8
  14. strbuf1 resb 256
  15. pad1 resb 1 ;; misaligned by 1
  16. strbuf2 resb 256
  17. pad2 resb 1 ;; misaligned by 2
  18. strbuf3 resb 256
  19. pad3 resb 1 ;; misaligned by 3
  20. strbuf4 resb 256
  21.  
  22. section .text
  23.  
  24.  
  25. _start:
  26.  
  27. push 100000
  28. push 255
  29. push strlen_3
  30. call benchmark
  31. push 100000
  32. push 251
  33. push strlen_3
  34. call benchmark
  35.  
  36. push 100000
  37. push 255
  38. push strlen_4
  39. call benchmark
  40. push 100000
  41. push 251
  42. push strlen_4
  43. call benchmark
  44.  
  45. call exit
  46.  
  47.  
  48.  
  49. benchmark: ;; function, string length, iterations
  50. push ebx
  51. push esi
  52. mov ebx, [esp + 20]
  53. mov ecx, [esp + 16]
  54. mov esi, [esp + 12]
  55. push ecx
  56. call makestring
  57. rdtsc
  58. mov dword [timelo], eax
  59. mov dword [timehi], edx
  60. .loop:
  61. push strbuf1
  62. push strbuf2
  63. push strbuf3
  64. push strbuf4
  65. call esi
  66. call esi
  67. call esi
  68. call esi
  69. sub ebx, 1
  70. jnz .loop
  71. rdtsc
  72. sub eax, dword [timelo]
  73. sbb edx, dword [timehi]
  74. push eax
  75. ;;push edx
  76. ;;call writehex
  77. call writehex
  78. pop esi
  79. pop ebx
  80. ret 12
  81.  
  82. align 16
  83. strlen_1:
  84. push edi
  85. mov ecx, -1
  86. xor edx, edx
  87. mov edi, [esp + 8] ;; str ptr
  88. cld
  89. xor eax, eax
  90. sub edx, edi
  91. repne scasb
  92. lea eax, [edi + edx - 1]
  93. pop edi
  94. ret 4
  95.  
  96.  
  97. align 16
  98. strlen_2:
  99. mov eax, [esp + 4]
  100. dec eax
  101. jmp .loop
  102. align 16
  103. .loop:
  104. add eax, 1
  105. test byte [eax], 0ffh
  106. jnz .loop
  107. sub eax, [esp + 4]
  108. ret 4
  109.  
  110.  
  111. align 16
  112. strlen_3:
  113. push ebx
  114. push esi
  115. push edi
  116. mov eax, [esp + 16]
  117. mov ebx, -01010101h
  118. test eax, 3
  119. jz .scan
  120. mov edx, [eax]
  121. test dl, dl
  122. jz .found
  123. inc eax
  124. test eax, 3
  125. jz .scan
  126. test dh, dh
  127. jz .found
  128. inc eax
  129. shr edx, 16
  130. test eax, 3
  131. jz .scan
  132. test dl, dl
  133. jz .found
  134. inc eax
  135. jmp .scan
  136. align 16
  137. .scan:
  138. mov esi, [eax]
  139. mov edi, [eax + 4]
  140. add eax, 8
  141. lea ecx, [esi + ebx]
  142. lea edx, [edi + ebx]
  143. not esi
  144. not edi
  145. and ecx, esi
  146. and edx, edi
  147. and ecx, 80808080h
  148. jnz .foundlo
  149. and edx, 80808080h
  150. jnz .foundhi
  151. mov esi, [eax]
  152. mov edi, [eax + 4]
  153. add eax, 8
  154. lea ecx, [esi + ebx]
  155. lea edx, [edi + ebx]
  156. not esi
  157. not edi
  158. and ecx, esi
  159. and edx, edi
  160. and ecx, 80808080h
  161. jnz .foundlo
  162. and edx, 80808080h
  163. jz .scan
  164. .foundhi:
  165. bsf edx, edx
  166. sub eax, [esp + 16]
  167. shr edx, 3
  168. lea eax, [eax + edx - 4]
  169. pop edi
  170. pop esi
  171. pop ebx
  172. ret 4
  173. .foundlo:
  174. bsf ecx, ecx
  175. sub eax, [esp + 16]
  176. shr ecx, 3
  177. lea eax, [eax + ecx - 8]
  178. pop edi
  179. pop esi
  180. pop ebx
  181. ret 4
  182. .found:
  183. sub eax, [esp + 16]
  184. pop edi
  185. pop esi
  186. pop ebx
  187. ret 4
  188.  
  189.  
  190. strlen_4:
  191. push ebx
  192. push esi
  193. push edi
  194. mov eax, [esp + 16]
  195. mov ebx, -01010101h
  196. test eax, 3
  197. jz .scan
  198. mov edx, [eax]
  199. test dl, dl
  200. jz .found
  201. inc eax
  202. test eax, 3
  203. jz .scan
  204. test dh, dh
  205. jz .found
  206. inc eax
  207. shr edx, 16
  208. test eax, 3
  209. jz .scan
  210. test dl, dl
  211. jz .found
  212. inc eax
  213. jmp .scan
  214. .found:
  215. sub eax, [esp + 16]
  216. pop edi
  217. pop esi
  218. pop ebx
  219. ret 4
  220. align 16
  221. .scan:
  222. mov esi, [eax]
  223. mov edi, [eax + 4]
  224. add eax, 8
  225. lea ecx, [esi + ebx]
  226. lea edx, [edi + ebx]
  227. not esi
  228. not edi
  229. and ecx, esi
  230. and edx, edi
  231. and ecx, 80808080h
  232. jnz .foundlo
  233. and edx, 80808080h
  234. jnz .foundhi
  235. mov esi, [eax]
  236. mov edi, [eax + 4]
  237. add eax, 8
  238. lea ecx, [esi + ebx]
  239. lea edx, [edi + ebx]
  240. not esi
  241. not edi
  242. and ecx, esi
  243. and edx, edi
  244. and ecx, 80808080h
  245. jnz .foundlo
  246. and edx, 80808080h
  247. jnz .foundhi
  248. mov esi, [eax]
  249. mov edi, [eax + 4]
  250. add eax, 8
  251. lea ecx, [esi + ebx]
  252. lea edx, [edi + ebx]
  253. not esi
  254. not edi
  255. and ecx, esi
  256. and edx, edi
  257. and ecx, 80808080h
  258. jnz .foundlo
  259. and edx, 80808080h
  260. jnz .foundhi
  261. mov esi, [eax]
  262. mov edi, [eax + 4]
  263. add eax, 8
  264. lea ecx, [esi + ebx]
  265. lea edx, [edi + ebx]
  266. not esi
  267. not edi
  268. and ecx, esi
  269. and edx, edi
  270. and ecx, 80808080h
  271. jnz .foundlo
  272. and edx, 80808080h
  273. jz .scan
  274. .foundhi:
  275. bsf edx, edx
  276. sub eax, [esp + 16]
  277. shr edx, 3
  278. lea eax, [eax + edx - 4]
  279. pop edi
  280. pop esi
  281. pop ebx
  282. ret 4
  283. .foundlo:
  284. bsf ecx, ecx
  285. sub eax, [esp + 16]
  286. shr ecx, 3
  287. lea eax, [eax + ecx - 8]
  288. pop edi
  289. pop esi
  290. pop ebx
  291. ret 4
  292.  
  293.  
  294. makestring:
  295. mov eax, [esp + 4] ;; size
  296. push eax
  297. push strbuf1
  298. push eax
  299. push strbuf2
  300. push eax
  301. push strbuf3
  302. push eax
  303. push strbuf4
  304. call fillstring
  305. call fillstring
  306. call fillstring
  307. call fillstring
  308. ret 4
  309.  
  310. fillstring:
  311. mov al, 1
  312. mov edx, [esp + 4] ;; ptr
  313. mov ecx, [esp + 8] ;; size
  314. mov byte [edx + ecx], 0
  315. .loop:
  316. dec ecx
  317. js .done
  318. mov byte [edx + ecx], al
  319. add al, 1
  320. jnz .skip
  321. mov al, 1
  322. .skip:
  323. jmp .loop
  324. .done:
  325. ret 8
  326.  
  327. writehex:
  328. push ebx
  329. push esi
  330. push edi
  331. mov eax, [esp + 16] ;; binary value
  332. mov ecx, 32 - 4
  333. mov esi, b2hlut
  334. mov edi, b2hout
  335. .loop:
  336. mov edx, eax
  337. shr edx, cl
  338. and edx, 0fh
  339. mov dl, byte [esi + edx]
  340. mov byte [edi], dl
  341. add edi, 1
  342. sub ecx, 4
  343. jns .loop
  344. mov byte [edi], 10
  345. ;;
  346. mov eax, 4
  347. mov ebx, 1
  348. mov ecx, b2hout
  349. mov edx, 9
  350. int 80h
  351. pop edi
  352. pop esi
  353. pop ebx
  354. ret 4
  355.  
  356. exit:
  357.  
  358. mov eax, 01h
  359. xor ebx, ebx
  360. int 80h
Success #stdin #stdout 0.12s 100KB
stdin
Standard input is empty
stdout
044dc4f8
0461fb72
03c531b9
04551969