共10条
1/1 1 跳转至页
大家讨论一下6000DSP程序的具体优化吧
大家看看下面的一个函数,请大家估计一下或者用汇编实现一下看看需要多少周期,欢迎讨论
uint32_t Zsad8h(const uint8_t * const cur,
const uint8_t * const ref,
const uint32_t CurStride,
const uint32_t RefStride,
const uint32_t round)
{
uint8_t const *ptr_cur = cur;
uint8_t const *ptr_ref = ref;
uint8_t tmpcur,tmpleft,tmpright,tmpmid;
int32_t qround = 1 - round;
uint32_t i,j;
uint32_t sad = 0;
for(j=0; j<8; j++){
tmpleft = ptr_ref[0];
for(i=0; i<8; i++){
tmpright = ptr_ref[i+1];
tmpcur = ptr_cur[i];
tmpmid = (tmpleft + tmpright + qround)/2;
tmpleft = tmpright;
sad += _abs(tmpcur - tmpmid);
}
ptr_cur += CurStride;
ptr_ref += RefStride;
}
return sad;
}
关键词: 大家 讨论 一下 6000DSP 程序 具体 优化
rookie,你真热情,我相信你是个高手吧 :-) 一眼就看出这个函数的功能了,你做过MPEG4的编码吧,我现在正在做,有机会向你请教问题哦。
const uint32_t CurStride :是当前帧数据块的行宽
const uint32_t RefStride :是参考帧数据块的行宽
const uint32_t round :是水平插值用了提高精度的补偿量
不知道你能明白我的意思吗?
|------------CurStride-------------|
------------|
| |
| |----8----| |
| ———— |
| | | |
| |_____| |
|______________________|
Zsad8h_loop:
ldbu .d1t1 *A_ptrRef++[2],A_refD1 ;11
|| ldbu .d2t2 *B_ptrCur++[2],B_curD6 ;11
|| mpyhu .m2x Bcnt,A_cstM1,Bcnt ;11
|| add .l1 A_SdiffD1,A_Sad1357,A_Sad1357 ;11
|| abs .l2 B_diffD2,B_SdiffD2 ;11
|| sub .s1 A_curD3,A_refMid3,A_diffD3 ;11
|| shr .s2 B_refD45r,1,B_refMid4 ;11
ldbu .d1t1 *A_ptrCur++[A_CurAdjust],A_curD7 ;12
|| ldbu .d2t2 *B_ptrRef++[2],B_refD2 ;12
|| abs .l1 A_diffD3,A_SdiffD3 ;12
|| add .l2 B_SdiffD2,B_Sad2468,B_Sad2468 ;12
|| add .s1 A_refD5,A_refD6,A_refD56 ;12
|| sub .s2 B_curD4,B_refMid4,B_diffD4 ;12
ldbu .d1t1 *A_ptrRef++[2],A_refD3 ;13
|| ldbu .d2t2 *B_ptrCur++[B_CurAdjust],B_curD8 ;13
|| add .l1 A_SdiffD3,A_Sad1357,A_Sad1357 ;13
|| abs .l2 B_diffD4,B_SdiffD4 ;13
|| add .s1 A_refD56,A_qround,A_refD56r ;13
|| add .s2x B_refD7,A_refD6,B_refD67 ;13
ldbu .d1t1 *A_ptrCur++[2],A_curD1 ;14
|| ldbu .d2t2 *B_ptrRef++[3],B_refD4 ;14
|| add .l1x B_refD7,A_refD8,A_refD78 ;14
|| add .l2 B_SdiffD4,B_Sad2468,B_Sad2468 ;14
|| shr .s1 A_refD56r,1,A_refMid5 ;14
|| add .s2x B_refD67,A_qround,B_refD67r ;14
ldbu .d1t1 *A_ptrRef++[1],A_refD5 ;15
|| ldbu .d2t2 *B_ptrCur++[2],B_curD2 ;15
|| add .l1 A_refD78,A_qround,A_refD78r ;15
|| add .l2x B_refD9,A_refD8,B_refD89 ;15
|| sub .s1 A_curD5,A_refMid5,A_diffD5 ;15
|| shr .s2 B_refD67r,1,B_refMid6 ;15
ldbu .d1t1 *A_ptrCur++[2],A_curD3 ;16
|| sub .d2 B_curD6,B_refMid6,B_diffD6 ;16
|| abs .l1 A_diffD5,A_SdiffD5 ;16
|| add .l2x B_refD89,A_qround,B_refD89r ;16
|| shr .s1 A_refD78r,1,A_refMid7 ;16
||[Bcnt]b .s2 Zsad8h_loop ;16
ldbu .d2t2 *B_ptrCur++[2],B_curD4 ;17
|| add .l1x A_refD1,B_refD2,A_refD12 ;17
|| sub .d1 A_curD7,A_refMid7,A_diffD7 ;17
|| abs .l2 B_diffD6,B_SdiffD6 ;17
|| add .s1 A_SdiffD5,A_Sad1357,A_Sad1357 ;17
|| shr .s2 B_refD89r,1,B_refMid8 ;17
ldbu .d1t1 *A_ptrRef++[2],A_refD6 ;18
|| add .l2x B_refD2,A_refD3,B_refD23 ;18
|| add .s1 A_refD12,A_qround,A_refD12r ;18
|| sub .d2 B_curD8,B_refMid8,B_diffD8 ;18
|| abs .l1 A_diffD7,A_SdiffD7 ;18
|| add .s2 B_SdiffD6,B_Sad2468,B_Sad2468 ;18
ldbu .d2t2 *B_ptrRef++[2],B_refD7 ;19
|| add .l1x A_refD3,B_refD4,A_refD34 ;19
|| shr .s1 A_refD12r,1,A_refMid1 ;19
|| add .s2x B_refD23,A_qround,B_refD23r ;19
|| add .d1 A_SdiffD7,A_Sad1357,A_Sad1357 ;19
|| abs .l2 B_diffD8,B_SdiffD8 ;19
ldbu .d1t1 *A_ptrRef++[A_RefAdjust],A_refD8 ;20
|| sub .l1 A_curD1,A_refMid1,A_diffD1 ;20
|| add .l2x A_refD5,B_refD4,B_refD45 ;20
|| add .s1 A_refD34,A_qround,A_refD34r ;20
|| shr .s2 B_refD23r,1,B_refMid2 ;20
|| add .d2 B_SdiffD8,B_Sad2468,B_Sad2468 ;20
ldbu .d1t1 *A_ptrCur++[2],A_curD5 ;21
|| ldbu .d2t2 *B_ptrRef++[B_RefAdjust],B_refD9 ;21
|| abs .l1 A_diffD1,A_SdiffD1 ;21
|| sub .l2 B_curD2,B_refMid2,B_diffD2 ;21
|| shr .s1 A_refD34r,1,A_refMid3 ;21
|| add .s2x B_refD45,A_qround,B_refD45r ;21
循环内核,希望有所启发
共10条
1/1 1 跳转至页
回复
有奖活动 | |
---|---|
【有奖活动】分享技术经验,兑换京东卡 | |
话不多说,快进群! | |
请大声喊出:我要开发板! | |
【有奖活动】EEPW网站征稿正在进行时,欢迎踊跃投稿啦 | |
奖!发布技术笔记,技术评测贴换取您心仪的礼品 | |
打赏了!打赏了!打赏了! |
打赏帖 | |
---|---|
与电子爱好者谈读图二被打赏50分 | |
【FRDM-MCXN947评测】Core1适配运行FreeRtos被打赏50分 | |
【FRDM-MCXN947评测】双核调试被打赏50分 | |
【CPKCORRA8D1B评测】---移植CoreMark被打赏50分 | |
【CPKCORRA8D1B评测】---打开硬件定时器被打赏50分 | |
【FRDM-MCXA156评测】4、CAN loopback模式测试被打赏50分 | |
【CPKcorRA8D1评测】--搭建初始环境被打赏50分 | |
【FRDM-MCXA156评测】3、使用FlexIO模拟UART被打赏50分 | |
【FRDM-MCXA156评测】2、rt-thread MCXA156 BSP制作被打赏50分 | |
【FRDM-MCXN947评测】核间通信MUTEX被打赏50分 |