//C code

 unsigned char* pColor1 = (unsigned char*)m_pImage->imageData+y1*m_pImage->widthStep+x1*3;
 unsigned char* pColor2 = (unsigned char*)m_pImage->imageData+y2*m_pImage->widthStep+x2*3;
 float r = (float)abs(pColor1[0]-pColor2[0]);
 float g = (float)abs(pColor1[1]-pColor2[1]);
 float b = (float)abs(pColor1[2]-pColor2[2]);
 float DiffValue = 10000.0/(r+g+b);


//SIMD Code

float GraphCut::PixelDiff_SIMD(int x1,int y1,int x2,int y2)
{
 float DiffValue =0;
 unsigned char* pColor1 = (unsigned char*)m_pImage->imageData+y1*m_pImage->widthStep+x1*3;
 unsigned char* pColor2 = (unsigned char*)m_pImage->imageData+y2*m_pImage->widthStep+x2*3;

 __m128i mmSrc1, mmSrc2;
 __m128i mmDst1, mmDst2;
 __m128i mmMask, mmResult;

 mmSrc1 = _mm_loadu_si128 ( (__m128i *) pColor1);
 mmSrc2 = _mm_loadu_si128 ( (__m128i *) pColor2);
 mmMask = _mm_set_epi8 (0x00, 0x00, 0x00, 0x00,0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff);
 
 //__m128i _mm_and_si128(__m128i a, __m128i b)
 mmDst1 =  _mm_and_si128(mmSrc1, mmMask);
 mmDst2 =  _mm_and_si128(mmSrc2, mmMask);

 //__m128i _mm_sad_epu8(__m128i a, __m128i b)
 mmResult = _mm_sad_epu8(mmDst1, mmDst2);

 int tmpRGB = mmResult.m128i_i32[0];//_mm_extract_epi8( mmResult, 0);

 if(m_mode ==0) DiffValue = 10000.0/(float)tmpRGB;

 //if(tmpRGB>0) printf("SIMD tmpRGB is %d \n",tmpRGB);
 return DiffValue; //值越大, weight越小,越容易切斷
}

 

void GraphCut::PixelDiff_SIMD_4(int x1,int y1, float *dst)
{

 //m_pNLink[index] = PixelDiff(x,y,x-1,y);  //左
 //m_pNLink[index+1] = PixelDiff(x,y,x-1,y-1); //左上
 //m_pNLink[index+2] = PixelDiff(x,y,x,y-1);  //上
 //m_pNLink[index+3] = PixelDiff(x,y,x+1,y-1); //右上

 int x2 = x1-1;
 int x3 = x1-1;
 int x4 = x1;
 int x5 = x1+1;

 int y2 = y1;
 int y3 = y1-1;
 int y4 = y1-1;
 int y5 = y1-1;

 unsigned char* pColor1 = (unsigned char*)m_pImage->imageData+y1*m_pImage->widthStep+x1*3;
 unsigned char* pColor2 = (unsigned char*)m_pImage->imageData+y2*m_pImage->widthStep+x2*3;
 unsigned char* pColor3 = (unsigned char*)m_pImage->imageData+y3*m_pImage->widthStep+x3*3;
 unsigned char* pColor4 = (unsigned char*)m_pImage->imageData+y4*m_pImage->widthStep+x4*3;
 unsigned char* pColor5 = (unsigned char*)m_pImage->imageData+y5*m_pImage->widthStep+x5*3;

 __m128i mmSrc1, mmSrc2;
 __m128i mmResult1, mmResult2, mmResult3, mmResult4, mmResult5;
 __m128i mmMask1, mmMask2, mmMask3, mmMask4;
 __m128 mmResult6, mmResult7, mmResult8;
 
 mmSrc2 = _mm_set_epi8 ( 0x00, pColor5[2], pColor5[1], pColor5[0], 0x00, pColor4[2], pColor4[1], pColor4[0], 0x00, pColor3[2], pColor3[1], pColor3[0], 0x00, pColor2[2], pColor2[1], pColor2[0]);
 mmSrc1 = _mm_set_epi8 ( 0x00, pColor1[2], pColor1[1], pColor1[0], 0x00, pColor1[2], pColor1[1], pColor1[0], 0x00, pColor1[2], pColor1[1], pColor1[0], 0x00, pColor1[2], pColor1[1], pColor1[0]);

 //取絕對值
 mmResult1 = _mm_subs_epu8(mmSrc1, mmSrc2);
 mmResult2 = _mm_subs_epu8(mmSrc2, mmSrc1);
 mmResult3 = _mm_adds_epu8(mmResult1, mmResult2);

 mmMask1 = _mm_set_epi8 ( 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);
 mmMask2 = _mm_set_epi16( 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01);

 //兩兩相乘
 mmResult4 = _mm_maddubs_epi16 (mmResult3, mmMask1); //由16個unsigned char變成8個signed integer
 mmResult5 = _mm_madd_epi16 (mmResult4, mmMask2); //由8個signed integer變成4個signed integer

 //4個signed integer轉成4個float
 mmResult6 = _mm_cvtepi32_ps(mmResult5);

 //設成4個相同的浮點數
 mmResult7 = _mm_set1_ps( 10000.0 );

 //__m128 _mm_div_ps(__m128 a, __m128 b)
 mmResult8 = _mm_div_ps(mmResult7, mmResult6);

 //值越大, weight越小,越容易切斷
 if(m_mode ==0)
 {
  dst[0] = mmResult8.m128_f32[0];
  dst[1] = mmResult8.m128_f32[1];
  dst[2] = mmResult8.m128_f32[2];
  dst[3] = mmResult8.m128_f32[3];
 }
}

arrow
arrow
    全站熱搜

    chunyuan 發表在 痞客邦 留言(0) 人氣()