VS2010中内联汇编入门--cdecl
在vs2010中进行内联汇编时,注意在被调函数中,编译器自动在栈中为局部变量预留一个区域,如下面的反汇编代码所示,ebp-0xc0,并且初始化为0xcccccccc
int __cdecl mse_sse(unsigned char * a, unsigned char * b, int len){
00B213B0 push ebp
00B213B1 mov ebp,esp
00B213B3 sub esp,0C0h
00B213B9 push ebx
00B213BA push esi
00B213BB push edi
00B213BC lea edi,[ebp-0C0h]
00B213C2 mov ecx,30h
00B213C7 mov eax,0CCCCCCCCh
00B213CC rep stos dword ptr es:[edi]
参数传递:
在cdecl调用下,参数由上层调用函数进行入栈,在内联汇编模式下,访问参数要用中括号加参数名称的方式获取参数
mov esi,[a];//a
00B213CE mov esi,dword ptr [a]
mov edi,[b];//b
00B213D1 mov edi,dword ptr [b]
mov ecx,[len];//len
00B213D4 mov ecx,dword ptr [len]
不要使用下面的方式:
mov esi,[ebp+8];//a
mov edi,[ebp+8+4];//b
mov ecx,[ebp+8+8];//len
这种方式在DEBUG下是正确的,但是在Release下会报错
下面是一个求MSE的小函数
int __cdecl mse_sse(unsigned char * a, unsigned char * b, int len){
__asm{
/*
mov esi,[ebp+8];//a
mov edi,[ebp+8+4];//b
mov ecx,[ebp+8+8];//len
*/
mov esi,[a];//a
mov edi,[b];//b
mov ecx,[len];//len
pxor xmm7,xmm7;//128bit 0
movdqu xmm0,[esi];//a[0-7]
punpcklbw xmm0,xmm7;//8bit->16bit
movdqu xmm1,[edi];//b[0-7]
punpcklbw xmm1,xmm7;//8bit->16bit
psubw xmm0,xmm1;//a-b
pabsw xmm0,xmm0; //|a-b|
//movdqu [esi],xmm0
pmullw xmm0,xmm0;//|a-b|^2
phaddw xmm0,xmm0;//0,1->0
phaddw xmm0,xmm0;//0,1,2,3->0
phaddw xmm0,xmm0;//0,...,7->0
movd eax,xmm0;
}
}
完整的代码:
//fiel:main.c
#include<stdlib.h>
#include<stdio.h>
#include"mem_align.h"
#include "asm.h"
struct sByte{
char c;
int i;
} aa;
int main(int argc, char * argv[])
{
int size =30;
int align_size =16;
//unsigned char * pVar_mem=(unsigned char *)(malloc(size+align_size));\
//unsigned char * pVar = pVar_mem+(align_size-(int)pVar_mem&0xf);
align_mem_get(pBuffer,30,16);
struct sByte bb;
unsigned char a[32]={0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7};
unsigned char b[32]={0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7};
int len = 8;
int mse=mse_sse_v2(a,b,len);
printf("mse = : %d \n",mse);
mse=mse_sse(a,b,len);
printf("mse = : %d \n",mse);
printf("%d \n",sizeof(aa));
printf("%d \n",sizeof(bb));
printf("align_mem_get pBuffer : 0x%08x \n",(int)pBuffer);
printf("align_mem_get pBuffer : 0x%08x mod 16 = %d \n",(int)pBuffer,(int)pBuffer%16);
align_mem_free(pBuffer);
return 0;
}
/*
FILE: mem_align.h
*/
#ifndef _MEM_AQLIGN_H_
#define _MEM_AQLIGN_H_
#define align_mem_get(pVar,size,align_size)\
unsigned char * pVar_mem=(unsigned char *)(malloc(size+align_size));\
unsigned char * pVar = pVar_mem+(align_size-(int)pVar_mem&0xf)
#define align_mem_free(pVar) \
free(pVar_mem); \
pVar = 0
#endif
// asm.h
#ifndef _ASM_H_
#define _ASM_H_
#include <xmmintrin.h>//SSE
#include <emmintrin.h>//SSE2
#include <pmmintrin.h>//SSE3
#include <tmmintrin.h>//SSSE3
#include <smmintrin.h>//SSE4.1
#include <nmmintrin.h>//SSE4.2
#include <immintrin.h>//AVX,AVX2,FMA
//__declspec(naked) static int mse_sse(unsigned char * a, unsigned char * b, int len);
int __cdecl mse_sse(unsigned char * a, unsigned char * b, int len);
int __cdecl mse_sse_v2(unsigned char * a, unsigned char * b, int len);
#endif
//file:asm.c
#include "asm.h"
//8x8 16bit unsigned short
//int __stdcall mse_sse(unsigned char * a, unsigned char * b, int len){
//__declspec(naked) static int mse_sse(unsigned char * a, unsigned char * b, int len){
int __cdecl mse_sse(unsigned char * a, unsigned char * b, int len){
__asm{
/*
mov esi,[ebp+8];//a
mov edi,[ebp+8+4];//b
mov ecx,[ebp+8+8];//len
*/
mov esi,[a];//a
mov edi,[b];//b
mov ecx,[len];//len
pxor xmm7,xmm7;//128bit 0
movdqu xmm0,[esi];//a[0-7]
punpcklbw xmm0,xmm7;//8bit->16bit
movdqu xmm1,[edi];//b[0-7]
punpcklbw xmm1,xmm7;//8bit->16bit
psubw xmm0,xmm1;//a-b
pabsw xmm0,xmm0; //|a-b|
//movdqu [esi],xmm0
pmullw xmm0,xmm0;//|a-b|^2
phaddw xmm0,xmm0;//0,1->0
phaddw xmm0,xmm0;//0,1,2,3->0
phaddw xmm0,xmm0;//0,...,7->0
movd eax,xmm0;
}
}
int __cdecl mse_sse_v2(unsigned char * a, unsigned char * b, int len){
int x = (int)a;
return len;
}
【资料】比较好的说明: http://blog.csdn.net/kzh313561014/article/details/7283765
【GGC 下的 at&t 汇编】http://blog.csdn.net/zhangqingsup/article/details/6704253
还没有评论,来说两句吧...