//C code
unsigned char* pColor1 = (unsigned char*)m_pImage->imageData+y1*m_pImage->widthStep+x1*3;
unsigned char* pColor2 = (unsigned char*)m_pImage->imageData+y2*m_pImage->widthStep+x2*3;
float r = (float)abs(pColor1[0]-pColor2[0]);
float g = (float)abs(pColor1[1]-pColor2[1]);
float b = (float)abs(pColor1[2]-pColor2[2]);
float DiffValue = 10000.0/(r+g+b);
//SIMD Code
float GraphCut::PixelDiff_SIMD(int x1,int y1,int x2,int y2)
{
float DiffValue =0;
unsigned char* pColor1 = (unsigned char*)m_pImage->imageData+y1*m_pImage->widthStep+x1*3;
unsigned char* pColor2 = (unsigned char*)m_pImage->imageData+y2*m_pImage->widthStep+x2*3;
__m128i mmSrc1, mmSrc2;
__m128i mmDst1, mmDst2;
__m128i mmMask, mmResult;
mmSrc1 = _mm_loadu_si128 ( (__m128i *) pColor1);
mmSrc2 = _mm_loadu_si128 ( (__m128i *) pColor2);
mmMask = _mm_set_epi8 (0x00, 0x00, 0x00, 0x00,0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff);
//__m128i _mm_and_si128(__m128i a, __m128i b)
mmDst1 = _mm_and_si128(mmSrc1, mmMask);
mmDst2 = _mm_and_si128(mmSrc2, mmMask);
//__m128i _mm_sad_epu8(__m128i a, __m128i b)
mmResult = _mm_sad_epu8(mmDst1, mmDst2);
int tmpRGB = mmResult.m128i_i32[0];//_mm_extract_epi8( mmResult, 0);
if(m_mode ==0) DiffValue = 10000.0/(float)tmpRGB;
//if(tmpRGB>0) printf("SIMD tmpRGB is %d \n",tmpRGB);
return DiffValue; //值越大, weight越小,越容易切斷
}
void GraphCut::PixelDiff_SIMD_4(int x1,int y1, float *dst)
{
//m_pNLink[index] = PixelDiff(x,y,x-1,y); //左
//m_pNLink[index+1] = PixelDiff(x,y,x-1,y-1); //左上
//m_pNLink[index+2] = PixelDiff(x,y,x,y-1); //上
//m_pNLink[index+3] = PixelDiff(x,y,x+1,y-1); //右上
int x2 = x1-1;
int x3 = x1-1;
int x4 = x1;
int x5 = x1+1;
int y2 = y1;
int y3 = y1-1;
int y4 = y1-1;
int y5 = y1-1;
unsigned char* pColor1 = (unsigned char*)m_pImage->imageData+y1*m_pImage->widthStep+x1*3;
unsigned char* pColor2 = (unsigned char*)m_pImage->imageData+y2*m_pImage->widthStep+x2*3;
unsigned char* pColor3 = (unsigned char*)m_pImage->imageData+y3*m_pImage->widthStep+x3*3;
unsigned char* pColor4 = (unsigned char*)m_pImage->imageData+y4*m_pImage->widthStep+x4*3;
unsigned char* pColor5 = (unsigned char*)m_pImage->imageData+y5*m_pImage->widthStep+x5*3;
__m128i mmSrc1, mmSrc2;
__m128i mmResult1, mmResult2, mmResult3, mmResult4, mmResult5;
__m128i mmMask1, mmMask2, mmMask3, mmMask4;
__m128 mmResult6, mmResult7, mmResult8;
mmSrc2 = _mm_set_epi8 ( 0x00, pColor5[2], pColor5[1], pColor5[0], 0x00, pColor4[2], pColor4[1], pColor4[0], 0x00, pColor3[2], pColor3[1], pColor3[0], 0x00, pColor2[2], pColor2[1], pColor2[0]);
mmSrc1 = _mm_set_epi8 ( 0x00, pColor1[2], pColor1[1], pColor1[0], 0x00, pColor1[2], pColor1[1], pColor1[0], 0x00, pColor1[2], pColor1[1], pColor1[0], 0x00, pColor1[2], pColor1[1], pColor1[0]);
//取絕對值
mmResult1 = _mm_subs_epu8(mmSrc1, mmSrc2);
mmResult2 = _mm_subs_epu8(mmSrc2, mmSrc1);
mmResult3 = _mm_adds_epu8(mmResult1, mmResult2);
mmMask1 = _mm_set_epi8 ( 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
mmMask2 = _mm_set_epi16( 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
//兩兩相乘
mmResult4 = _mm_maddubs_epi16 (mmResult3, mmMask1); //由16個unsigned char變成8個signed integer
mmResult5 = _mm_madd_epi16 (mmResult4, mmMask2); //由8個signed integer變成4個signed integer
//4個signed integer轉成4個float
mmResult6 = _mm_cvtepi32_ps(mmResult5);
//設成4個相同的浮點數
mmResult7 = _mm_set1_ps( 10000.0 );
//__m128 _mm_div_ps(__m128 a, __m128 b)
mmResult8 = _mm_div_ps(mmResult7, mmResult6);
//值越大, weight越小,越容易切斷
if(m_mode ==0)
{
dst[0] = mmResult8.m128_f32[0];
dst[1] = mmResult8.m128_f32[1];
dst[2] = mmResult8.m128_f32[2];
dst[3] = mmResult8.m128_f32[3];
}
}
留言列表