fork(6) download
  1. global _start
  2.  
  3.  
  4.  
  5. section .data
  6.  
  7. timelo dd 0
  8. timehi dd 0
  9. b2hout db 0,0,0,0,0,0,0,0,0
  10.  
  11.  
  12. b2hlut db '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'
  13. align 8
  14. strbuf1 resb 256
  15. pad1 resb 1 ;; misaligned by 1
  16. strbuf2 resb 256
  17. pad2 resb 1 ;; misaligned by 2
  18. strbuf3 resb 256
  19. pad3 resb 1 ;; misaligned by 3
  20. strbuf4 resb 256
  21.  
  22. section .text
  23.  
  24.  
  25. _start:
  26.  
  27. push 100000
  28. push 255
  29. push strlen_3
  30. call benchmark
  31. push 100000
  32. push 251
  33. push strlen_3
  34. call benchmark
  35.  
  36. push 100000
  37. push 255
  38. push strlen_4
  39. call benchmark
  40. push 100000
  41. push 251
  42. push strlen_4
  43. call benchmark
  44.  
  45. call exit
  46.  
  47.  
  48.  
  49. benchmark: ;; function, string length, iterations
  50. push ebx
  51. push esi
  52. mov ebx, [esp + 20]
  53. mov ecx, [esp + 16]
  54. mov esi, [esp + 12]
  55. push ecx
  56. call makestring
  57. rdtsc
  58. mov dword [timelo], eax
  59. mov dword [timehi], edx
  60. .loop:
  61. push strbuf1
  62. push strbuf2
  63. push strbuf3
  64. push strbuf4
  65. call esi
  66. call esi
  67. call esi
  68. call esi
  69. sub ebx, 1
  70. jnz .loop
  71. rdtsc
  72. sub eax, dword [timelo]
  73. sbb edx, dword [timehi]
  74. push eax
  75. ;;push edx
  76. ;;call writehex
  77. call writehex
  78. pop esi
  79. pop ebx
  80. ret 12
  81.  
  82. align 16
  83. strlen_1:
  84. push edi
  85. mov ecx, -1
  86. xor edx, edx
  87. mov edi, [esp + 8] ;; str ptr
  88. cld
  89. xor eax, eax
  90. sub edx, edi
  91. repne scasb
  92. lea eax, [edi + edx - 1]
  93. pop edi
  94. ret 4
  95.  
  96.  
  97. align 16
  98. strlen_2:
  99. mov eax, [esp + 4]
  100. dec eax
  101. jmp .loop
  102. align 16
  103. .loop:
  104. add eax, 1
  105. test byte [eax], 0ffh
  106. jnz .loop
  107. sub eax, [esp + 4]
  108. ret 4
  109.  
  110.  
  111. align 16
  112. strlen_3:
  113. push ebx
  114. push esi
  115. push edi
  116. mov eax, [esp + 16]
  117. mov ebx, -01010101h
  118. .aligning:
  119. test eax, 3
  120. jz .scan
  121. mov dl, [eax]
  122. test dl, dl
  123. jz .found
  124. inc eax
  125. jmp .aligning
  126. align 32
  127. .scan:
  128. mov esi, [eax]
  129. mov edi, [eax + 4]
  130. lea eax, [eax + 8]
  131. lea ecx, [esi + ebx] ;!
  132. lea edx, [edi + ebx]
  133. not esi
  134. not edi
  135. and ecx, esi
  136. and edx, edi
  137. and ecx, $80808080
  138. and edx, $80808080
  139. test ecx, ecx ;!!
  140. jnz .sub8
  141. test edx, edx
  142. jz .scan
  143. lea eax, [eax-4]
  144. mov ecx, edx
  145. jmp .bytesearch
  146. .sub8:
  147. lea eax,[eax-8]
  148. .bytesearch:
  149. test cl, cl
  150. jnz .found
  151. inc eax
  152. test ch, ch
  153. jnz .found
  154. shr ecx, 16
  155. inc eax
  156. test cl, cl
  157. jnz .found
  158. inc eax
  159. .found:
  160. sub eax, [esp + 16]
  161. pop edi
  162. pop esi
  163. pop ebx
  164. ret 4
  165.  
  166.  
  167. strlen_4:
  168. push ebx
  169. push esi
  170. push edi
  171. mov eax, [esp + 16]
  172. mov ebx, -01010101h
  173. test eax, 3
  174. jz .scan
  175. mov edx, [eax]
  176. test dl, dl
  177. jz .found
  178. inc eax
  179. test eax, 3
  180. jz .scan
  181. test dh, dh
  182. jz .found
  183. inc eax
  184. shr edx, 16
  185. test eax, 3
  186. jz .scan
  187. test dl, dl
  188. jz .found
  189. inc eax
  190. jmp .scan
  191. align 16
  192. .scan:
  193. mov esi, [eax]
  194. mov edi, [eax + 4]
  195. add eax, 8
  196. lea ecx, [esi + ebx]
  197. lea edx, [edi + ebx]
  198. not esi
  199. not edi
  200. and ecx, esi
  201. and edx, edi
  202. and ecx, 80808080h
  203. jnz .foundlo
  204. and edx, 80808080h
  205. jz .scan
  206. .foundhi:
  207. bsf edx, edx
  208. sub eax, [esp + 16]
  209. shr edx, 3
  210. lea eax, [eax + edx - 4]
  211. pop edi
  212. pop esi
  213. pop ebx
  214. ret 4
  215. .foundlo:
  216. bsf ecx, ecx
  217. sub eax, [esp + 16]
  218. shr ecx, 3
  219. lea eax, [eax + ecx - 8]
  220. pop edi
  221. pop esi
  222. pop ebx
  223. ret 4
  224. .found:
  225. sub eax, [esp + 16]
  226. pop edi
  227. pop esi
  228. pop ebx
  229. ret 4
  230.  
  231. makestring:
  232. mov eax, [esp + 4] ;; size
  233. push eax
  234. push strbuf1
  235. push eax
  236. push strbuf2
  237. push eax
  238. push strbuf3
  239. push eax
  240. push strbuf4
  241. call fillstring
  242. call fillstring
  243. call fillstring
  244. call fillstring
  245. ret 4
  246.  
  247. fillstring:
  248. mov al, 1
  249. mov edx, [esp + 4] ;; ptr
  250. mov ecx, [esp + 8] ;; size
  251. mov byte [edx + ecx], 0
  252. .loop:
  253. dec ecx
  254. js .done
  255. mov byte [edx + ecx], al
  256. add al, 1
  257. jnz .skip
  258. mov al, 1
  259. .skip:
  260. jmp .loop
  261. .done:
  262. ret 8
  263.  
  264. writehex:
  265. push ebx
  266. push esi
  267. push edi
  268. mov eax, [esp + 16] ;; binary value
  269. mov ecx, 32 - 4
  270. mov esi, b2hlut
  271. mov edi, b2hout
  272. .loop:
  273. mov edx, eax
  274. shr edx, cl
  275. and edx, 0fh
  276. mov dl, byte [esi + edx]
  277. mov byte [edi], dl
  278. add edi, 1
  279. sub ecx, 4
  280. jns .loop
  281. mov byte [edi], 10
  282. ;;
  283. mov eax, 4
  284. mov ebx, 1
  285. mov ecx, b2hout
  286. mov edx, 9
  287. int 80h
  288. pop edi
  289. pop esi
  290. pop ebx
  291. ret 4
  292.  
  293. exit:
  294.  
  295. mov eax, 01h
  296. xor ebx, ebx
  297. int 80h
Success #stdin #stdout 0.14s 100KB
stdin
Standard input is empty
stdout
04f7adeb
04e9b5de
04574912
04554338