这些小活动你都参加了吗?快来围观一下吧!>>
电子产品世界 » 论坛首页 » 活动中心 » 板卡试用 » 【换取逻辑分析仪】RA8使用PMU对接适配perfcounter

共4条 1/1 1 跳转至

【换取逻辑分析仪】RA8使用PMU对接适配perfcounter

助工
2024-10-20 20:54:41     打赏

简介:

       在之前的帖子(RA8 PMU 模块功能寄存器功能说明),我们已经介绍了PMU线管的寄存器,在之前的帖子(LVGL DMA2D/CPU搬运数据至framebuff性能比较)使用systick 对接的perf_counter 来计算代码运行所占用的cycle counter,这个监测代码运行周期和PMU 的 cycle counter 功能基本是一致的,perf counter 已经按照分层的思想设计的,底层只要适配perf counter 以来的接口即可,最新的代码已经适配了对接PMU 的代码,我们将PMU 对接到perf counter,底层只要是配依赖的接口即可,对应接口如下。

image.png


使用PMU 实现上述依赖底层实现的接口函数.

/*============================ PROTOTYPES ====================================*/
/* low level interface for porting */
extern
uint32_t perfc_port_get_system_timer_freq(void);
extern
int64_t perfc_port_get_system_timer_top(void);
extern
bool perfc_port_is_system_timer_ovf_pending(void);
extern
bool perfc_port_init_system_timer(bool bTimerOccupied);
extern
int64_t perfc_port_get_system_timer_elapsed(void);
extern
void perfc_port_clear_system_timer_ovf_pending(void);
extern
void perfc_port_stop_system_timer_counting(void);
extern
void perfc_port_clear_system_timer_counter(void);

bool perfc_port_init_system_timer(bool bIsTimeOccupied)
{
    UNUSED_PARAM(bIsTimeOccupied);

    if (!(PMU->TYPE & PMU_TYPE_CYCCNT_PRESENT_Msk)) {
        return false;
    }

    __IRQ_SAFE {

        PMU->CTRL &= ~PMU_CTRL_ENABLE_Msk;

        perfc_port_stop_system_timer_counting();
        
        /* disable PMU Cycle Counter interrupt */
        PMU->INTENCLR = PMU_INTENCLR_CYCCNT_ENABLE_Msk;

        perfc_port_clear_system_timer_counter();
        perfc_port_clear_system_timer_ovf_pending();

        /* reset all event counter */
        PMU->CTRL |= PMU_CTRL_EVENTCNT_RESET_Msk;

        /* configure event counter */
        do {
            uint_fast8_t chCounter = PMU->TYPE & PMU_TYPE_NUM_CNTS_Msk;
            chCounter = MIN(chCounter, __PMU_NUM_EVENTCNT);
            
            if (chCounter >= 2) {
                /* 32 bit counter for instruction architecturally executed */
                PMU->EVTYPER[0] = ARM_PMU_INST_RETIRED;
                PMU->EVTYPER[1] = ARM_PMU_CHAIN;

                /* clear counter 0/1 overflow flag */
                PMU->OVSCLR = PMU_OVSCLR_CNT0_STATUS_Msk
                            | PMU_OVSCLR_CNT1_STATUS_Msk;

                /* enable counter 1 interrupt */
                PMU->INTENSET = PMU_INTENSET_CNT1_ENABLE_Msk;

                /* enable counter 0/1 */
                PMU->CNTENSET = PMU_CNTENSET_CNT0_ENABLE_Msk
                              | PMU_CNTENSET_CNT1_ENABLE_Msk;
            }

            if (chCounter >= 4) {
                /* 32bit counter for all Data memory Accesses */
                PMU->EVTYPER[2] = ARM_PMU_MEM_ACCESS;
                PMU->EVTYPER[3] = ARM_PMU_CHAIN;

                /* clear counter 2/3 overflow flag */
                PMU->OVSCLR = PMU_OVSCLR_CNT2_STATUS_Msk
                            | PMU_OVSCLR_CNT3_STATUS_Msk;

                /* enable counter 3 interrupt */
                PMU->INTENSET = PMU_INTENSET_CNT3_ENABLE_Msk;

                /* enable counter 2/3 */
                PMU->CNTENSET = PMU_CNTENSET_CNT2_ENABLE_Msk
                              | PMU_CNTENSET_CNT3_ENABLE_Msk;
            }

            if (chCounter >= 6) {
                /* 32bit counter for all Data memory Accesses */
                PMU->EVTYPER[4] = ARM_PMU_L1D_CACHE_REFILL;
                PMU->EVTYPER[5] = ARM_PMU_CHAIN;

                /* clear counter 4/5 overflow flag */
                PMU->OVSCLR = PMU_OVSCLR_CNT4_STATUS_Msk
                            | PMU_OVSCLR_CNT5_STATUS_Msk;

                /* enable counter 5 interrupt */
                PMU->INTENSET = PMU_INTENSET_CNT5_ENABLE_Msk;

                /* enable counter 4/5 */
                PMU->CNTENSET = PMU_CNTENSET_CNT4_ENABLE_Msk
                              | PMU_CNTENSET_CNT5_ENABLE_Msk;

            }

            if (chCounter > 6) {
                for (uint_fast8_t n = 6; n < chCounter; n++) {
                    uint32_t wMask = (1<<n);

                    PMU->OVSCLR = wMask;        /* clear overflow flag */
                    PMU->INTENSET = wMask;      /* enable interrupt */
                    PMU->CNTENSET = wMask;      /* enable counter */
                }
            }

        } while(0);

        DCB->DEMCR |= DCB_DEMCR_UMON_EN_Msk         |
                      DCB_DEMCR_SDME_Msk            |
                      DCB_DEMCR_TRCENA_Msk          |
                      DCB_DEMCR_MON_EN_Msk          ;

        /* enable PMU Cycle Counter interrupt */
        PMU->INTENSET = PMU_INTENSET_CCYCNT_ENABLE_Msk;

        PMU->CNTENSET = PMU_CNTENSET_CCNTR_ENABLE_Msk;
        PMU->CTRL |= PMU_CTRL_ENABLE_Msk;
        
        /* force to disable DWT */
        DWT->CTRL = 0;
    }
    
    return true;
}

uint64_t perfc_pmu_get_instruction_count(void)
{
    uint32_t wHigh16, wLow16;
    uint64_t dwResult;
    bool bIsOverflow = false;
    __IRQ_SAFE {
        do {
            wHigh16 = PMU->EVCNTR[1];
            wLow16 = PMU->EVCNTR[0];
        } while(wHigh16 < PMU->EVCNTR[1]);
        dwResult = s_dwEventCounter[PMU_CNT_INSTRUCTION];
        bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT1_STATUS_Msk));
    }

    dwResult += wLow16 | (wHigh16 << 16);
    
    if (bIsOverflow) {
        dwResult += (uint64_t)1<<32;
    }

    /* force to disable DWT */
    DWT->CTRL = 0;

    return dwResult;
}

uint64_t perfc_pmu_get_memory_access_count(void)
{
    uint32_t wHigh16, wLow16;
    uint64_t dwResult;
    bool bIsOverflow = false;

    __IRQ_SAFE {
        do {
            wHigh16 = PMU->EVCNTR[3];
            wLow16 = PMU->EVCNTR[2];
        } while(wHigh16 < PMU->EVCNTR[3]);
        dwResult = s_dwEventCounter[PMU_CNT_MEM_ACCESS];
        bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT3_STATUS_Msk));
    }

    dwResult += wLow16 | (wHigh16 << 16);
    
    if (bIsOverflow) {
        dwResult += (uint64_t)1<<32;
    }

    /* force to disable DWT */
    DWT->CTRL = 0;

    return dwResult;
}


uint64_t perfc_pmu_get_L1_dcache_refill_count(void)
{
    uint32_t wHigh16, wLow16;
    uint64_t dwResult;
    bool bIsOverflow = false;

    __IRQ_SAFE {
        do {
            wHigh16 = PMU->EVCNTR[5];
            wLow16 = PMU->EVCNTR[4];
        } while(wHigh16 < PMU->EVCNTR[5]);
        dwResult = s_dwEventCounter[PMU_CNT_L1_DCACHE_REFILL];
        bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT5_STATUS_Msk));
    }

    dwResult += wLow16 | (wHigh16 << 16);
    
    if (bIsOverflow) {
        dwResult += (uint64_t)1<<32;
    }

    /* force to disable DWT */
    DWT->CTRL = 0;

    return dwResult;
}

uint32_t perfc_port_get_system_timer_freq(void)
{
    extern uint32_t SystemCoreClock;

    /* return the system timer frequency */
    return SystemCoreClock;
}

bool perfc_port_is_system_timer_ovf_pending(void)
{
    /* whether the system timer overflow is pending */
    return PMU->OVSSET & PMU_OVSSET_CYCCNT_STATUS_Msk;
}

int64_t perfc_port_get_system_timer_top(void)
{
    /* the top value of the counting */
    return 0xFFFFFFFF;
}

int64_t perfc_port_get_system_timer_elapsed(void)
{
    return (int64_t)PMU->CCNTR;//ARM_PMU_Get_CCNTR();
}

void perfc_port_clear_system_timer_ovf_pending(void)
{
    PMU->OVSCLR = PMU_OVSCLR_CYCCNT_STATUS_Msk;
}

void perfc_port_stop_system_timer_counting(void)
{
    /* stop the system timer */
    PMU->CNTENCLR = PMU_CNTENCLR_CCNTR_ENABLE_Msk;
}

void perfc_port_clear_system_timer_counter(void)
{
    /* clear the system timer counter */
    PMU->CTRL |= PMU_CTRL_CYCCNT_RESET_Msk;
}

将上述PMU 底层实现接口添加到工程,并配置perf_counter 使用PMU 作为服务提供者,配置文件修改如下:

#define PERFC_LOW_LEVEL_TYPE_SYSTICK  0
#define PERFC_LOW_LEVEL_TYPE_PMU      1

#define PERFC_LOW_LEVEL_TYPE  PERFC_LOW_LEVEL_TYPE_SYSTICK /* 0: system tick 1:pmu */

#if (PERFC_LOW_LEVEL_TYPE_PMU == PERFC_LOW_LEVEL_TYPE)
#define __PERFC_CFG_DISABLE_DEFAULT_SYSTICK_PORTING__   1
#define __PERFC_USE_PMU_PORTING__   1
#define __PERFC_CFG_PORTING_INCLUDE__   "perfc_port_pmu.h"
#endif

对接完成后添加如下测试代码,测试vTaskDelay(1000) 这个段代码期间的性能参数。

#if (PERFC_LOW_LEVEL_TYPE_PMU == PERFC_LOW_LEVEL_TYPE)
    /* measure cycles and store it in a dedicated variable without printf */
    __cpu_perf__("delay_us(1000ul)"){
        vTaskDelay(1000);
    }
#elif (PERFC_LOW_LEVEL_TYPE_SYSTICK == PERFC_LOW_LEVEL_TYPE)
    int32_t iCycleResult = 0;

    start_cycle_counter();
    vTaskDelay(1000);
    iCycleResult = stop_cycle_counter();

    printf("\r\n delay_us(1000ul) takes %d cycles\r\n", (int)iCycleResult);
#endif


执行结果如下,相对systick 的功能PMU 增加了CACHE 和 内存访问次数及CPI 的性能参数的打印输出。

image.png


专家
2024-10-21 00:14:43     打赏
2楼

感谢楼主分享


专家
2024-10-21 06:32:05     打赏
3楼

看看


工程师
2024-10-21 08:37:22     打赏
4楼

学习了。


共4条 1/1 1 跳转至

回复

匿名不能发帖!请先 [ 登陆 注册 ]