首页 >数据库 >mysql教程 >内存拷贝的优化方法(草稿) [2]

内存拷贝的优化方法(草稿) [2]

WBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWBOYWB原创: 2016-06-07 15:44:151204浏览

以下为引用： global_fast_memcpy9 %defineparamesp124 %definesrcparam0 %definedstparam4 %definelenparam8 %defineCACHEBLOCK400h _fast_memcpy9: pushesi pushedi pushebx movesi,[src];sourcearray movedi,[dst];destinationarray movecx,[len];numbero

以下为引用：

global _fast_memcpy9
%define param       esp+12+4
%define src         param+0
%define dst         param+4
%define len         param+8

%define CACHEBLOCK 400h

_fast_memcpy9:
   push esi
   push edi
   push ebx

   mov esi, [src]              ; source array
   mov edi, [dst]              ; destination array
   mov ecx, [len]              ; number of QWORDS (8 bytes) assumes len / CACHEBLOCK is an integer
   shr ecx, 3

   lea esi, [esi+ecx*8]        ; end of source
   lea edi, [edi+ecx*8]        ; end of destination
   neg ecx                     ; use a negative offset as a combo pointer-and-loop-counter

.mainloop:
   mov eax, CACHEBLOCK / 16    ; note: .prefetchloop is unrolled 2X
   add ecx, CACHEBLOCK         ; move up to end of block

.prefetchloop:
   mov ebx, [esi+ecx*8-64]     ; read one address in this cache line...
   mov ebx, [esi+ecx*8-128]    ; ... and one in the previous line
   sub ecx, 16                 ; 16 QWORDS = 2 64-byte cache lines
   dec eax
   jnz .prefetchloop

   mov eax, CACHEBLOCK / 8

.writeloop:
   prefetchnta [esi+ecx*8 + 512] ; fetch ahead by 512 bytes

   movq mm0, qword [esi+ecx*8]
   movq mm1, qword [esi+ecx*8+8]
   movq mm2, qword [esi+ecx*8+16]
   movq mm3, qword [esi+ecx*8+24]
   movq mm4, qword [esi+ecx*8+32]
   movq mm5, qword [esi+ecx*8+40]
   movq mm6, qword [esi+ecx*8+48]
   movq mm7, qword [esi+ecx*8+56]

   movntq qword [edi+ecx*8], mm0
   movntq qword [edi+ecx*8+8], mm1
   movntq qword [edi+ecx*8+16], mm2
   movntq qword [edi+ecx*8+24], mm3
   movntq qword [edi+ecx*8+32], mm4
   movntq qword [edi+ecx*8+40], mm5
   movntq qword [edi+ecx*8+48], mm6
   movntq qword [edi+ecx*8+56], mm7

   add ecx, 8
   dec eax
   jnz .writeloop

   or ecx, ecx ; assumes integer number of cacheblocks
   jnz .mainloop

   sfence ; flush write buffer
   emms

   pop ebx
   pop edi
   pop esi

  ret

声明：

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系admin@php.cn

上一篇：Access中使用SQL语句应掌握的几点技巧(学习)下一篇：C#操作Access之读取mdb浅析

查看更多

内存拷贝的优化方法(草稿) [2]

相关文章