简介:
在之前的帖子(RA8 PMU 模块功能寄存器功能说明),我们已经介绍了PMU线管的寄存器,在之前的帖子(LVGL DMA2D/CPU搬运数据至framebuff性能比较)使用systick 对接的perf_counter 来计算代码运行所占用的cycle counter,这个监测代码运行周期和PMU 的 cycle counter 功能基本是一致的,perf counter 已经按照分层的思想设计的,底层只要适配perf counter 以来的接口即可,最新的代码已经适配了对接PMU 的代码,我们将PMU 对接到perf counter,底层只要是配依赖的接口即可,对应接口如下。
使用PMU 实现上述依赖底层实现的接口函数.
/*============================ PROTOTYPES ====================================*/ /* low level interface for porting */ extern uint32_t perfc_port_get_system_timer_freq(void); extern int64_t perfc_port_get_system_timer_top(void); extern bool perfc_port_is_system_timer_ovf_pending(void); extern bool perfc_port_init_system_timer(bool bTimerOccupied); extern int64_t perfc_port_get_system_timer_elapsed(void); extern void perfc_port_clear_system_timer_ovf_pending(void); extern void perfc_port_stop_system_timer_counting(void); extern void perfc_port_clear_system_timer_counter(void); bool perfc_port_init_system_timer(bool bIsTimeOccupied) { UNUSED_PARAM(bIsTimeOccupied); if (!(PMU->TYPE & PMU_TYPE_CYCCNT_PRESENT_Msk)) { return false; } __IRQ_SAFE { PMU->CTRL &= ~PMU_CTRL_ENABLE_Msk; perfc_port_stop_system_timer_counting(); /* disable PMU Cycle Counter interrupt */ PMU->INTENCLR = PMU_INTENCLR_CYCCNT_ENABLE_Msk; perfc_port_clear_system_timer_counter(); perfc_port_clear_system_timer_ovf_pending(); /* reset all event counter */ PMU->CTRL |= PMU_CTRL_EVENTCNT_RESET_Msk; /* configure event counter */ do { uint_fast8_t chCounter = PMU->TYPE & PMU_TYPE_NUM_CNTS_Msk; chCounter = MIN(chCounter, __PMU_NUM_EVENTCNT); if (chCounter >= 2) { /* 32 bit counter for instruction architecturally executed */ PMU->EVTYPER[0] = ARM_PMU_INST_RETIRED; PMU->EVTYPER[1] = ARM_PMU_CHAIN; /* clear counter 0/1 overflow flag */ PMU->OVSCLR = PMU_OVSCLR_CNT0_STATUS_Msk | PMU_OVSCLR_CNT1_STATUS_Msk; /* enable counter 1 interrupt */ PMU->INTENSET = PMU_INTENSET_CNT1_ENABLE_Msk; /* enable counter 0/1 */ PMU->CNTENSET = PMU_CNTENSET_CNT0_ENABLE_Msk | PMU_CNTENSET_CNT1_ENABLE_Msk; } if (chCounter >= 4) { /* 32bit counter for all Data memory Accesses */ PMU->EVTYPER[2] = ARM_PMU_MEM_ACCESS; PMU->EVTYPER[3] = ARM_PMU_CHAIN; /* clear counter 2/3 overflow flag */ PMU->OVSCLR = PMU_OVSCLR_CNT2_STATUS_Msk | PMU_OVSCLR_CNT3_STATUS_Msk; /* enable counter 3 interrupt */ PMU->INTENSET = PMU_INTENSET_CNT3_ENABLE_Msk; /* enable counter 2/3 */ PMU->CNTENSET = PMU_CNTENSET_CNT2_ENABLE_Msk | PMU_CNTENSET_CNT3_ENABLE_Msk; } if (chCounter >= 6) { /* 32bit counter for all Data memory Accesses */ PMU->EVTYPER[4] = ARM_PMU_L1D_CACHE_REFILL; PMU->EVTYPER[5] = ARM_PMU_CHAIN; /* clear counter 4/5 overflow flag */ PMU->OVSCLR = PMU_OVSCLR_CNT4_STATUS_Msk | PMU_OVSCLR_CNT5_STATUS_Msk; /* enable counter 5 interrupt */ PMU->INTENSET = PMU_INTENSET_CNT5_ENABLE_Msk; /* enable counter 4/5 */ PMU->CNTENSET = PMU_CNTENSET_CNT4_ENABLE_Msk | PMU_CNTENSET_CNT5_ENABLE_Msk; } if (chCounter > 6) { for (uint_fast8_t n = 6; n < chCounter; n++) { uint32_t wMask = (1<<n); PMU->OVSCLR = wMask; /* clear overflow flag */ PMU->INTENSET = wMask; /* enable interrupt */ PMU->CNTENSET = wMask; /* enable counter */ } } } while(0); DCB->DEMCR |= DCB_DEMCR_UMON_EN_Msk | DCB_DEMCR_SDME_Msk | DCB_DEMCR_TRCENA_Msk | DCB_DEMCR_MON_EN_Msk ; /* enable PMU Cycle Counter interrupt */ PMU->INTENSET = PMU_INTENSET_CCYCNT_ENABLE_Msk; PMU->CNTENSET = PMU_CNTENSET_CCNTR_ENABLE_Msk; PMU->CTRL |= PMU_CTRL_ENABLE_Msk; /* force to disable DWT */ DWT->CTRL = 0; } return true; } uint64_t perfc_pmu_get_instruction_count(void) { uint32_t wHigh16, wLow16; uint64_t dwResult; bool bIsOverflow = false; __IRQ_SAFE { do { wHigh16 = PMU->EVCNTR[1]; wLow16 = PMU->EVCNTR[0]; } while(wHigh16 < PMU->EVCNTR[1]); dwResult = s_dwEventCounter[PMU_CNT_INSTRUCTION]; bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT1_STATUS_Msk)); } dwResult += wLow16 | (wHigh16 << 16); if (bIsOverflow) { dwResult += (uint64_t)1<<32; } /* force to disable DWT */ DWT->CTRL = 0; return dwResult; } uint64_t perfc_pmu_get_memory_access_count(void) { uint32_t wHigh16, wLow16; uint64_t dwResult; bool bIsOverflow = false; __IRQ_SAFE { do { wHigh16 = PMU->EVCNTR[3]; wLow16 = PMU->EVCNTR[2]; } while(wHigh16 < PMU->EVCNTR[3]); dwResult = s_dwEventCounter[PMU_CNT_MEM_ACCESS]; bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT3_STATUS_Msk)); } dwResult += wLow16 | (wHigh16 << 16); if (bIsOverflow) { dwResult += (uint64_t)1<<32; } /* force to disable DWT */ DWT->CTRL = 0; return dwResult; } uint64_t perfc_pmu_get_L1_dcache_refill_count(void) { uint32_t wHigh16, wLow16; uint64_t dwResult; bool bIsOverflow = false; __IRQ_SAFE { do { wHigh16 = PMU->EVCNTR[5]; wLow16 = PMU->EVCNTR[4]; } while(wHigh16 < PMU->EVCNTR[5]); dwResult = s_dwEventCounter[PMU_CNT_L1_DCACHE_REFILL]; bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT5_STATUS_Msk)); } dwResult += wLow16 | (wHigh16 << 16); if (bIsOverflow) { dwResult += (uint64_t)1<<32; } /* force to disable DWT */ DWT->CTRL = 0; return dwResult; } uint32_t perfc_port_get_system_timer_freq(void) { extern uint32_t SystemCoreClock; /* return the system timer frequency */ return SystemCoreClock; } bool perfc_port_is_system_timer_ovf_pending(void) { /* whether the system timer overflow is pending */ return PMU->OVSSET & PMU_OVSSET_CYCCNT_STATUS_Msk; } int64_t perfc_port_get_system_timer_top(void) { /* the top value of the counting */ return 0xFFFFFFFF; } int64_t perfc_port_get_system_timer_elapsed(void) { return (int64_t)PMU->CCNTR;//ARM_PMU_Get_CCNTR(); } void perfc_port_clear_system_timer_ovf_pending(void) { PMU->OVSCLR = PMU_OVSCLR_CYCCNT_STATUS_Msk; } void perfc_port_stop_system_timer_counting(void) { /* stop the system timer */ PMU->CNTENCLR = PMU_CNTENCLR_CCNTR_ENABLE_Msk; } void perfc_port_clear_system_timer_counter(void) { /* clear the system timer counter */ PMU->CTRL |= PMU_CTRL_CYCCNT_RESET_Msk; }
将上述PMU 底层实现接口添加到工程,并配置perf_counter 使用PMU 作为服务提供者,配置文件修改如下:
#define PERFC_LOW_LEVEL_TYPE_SYSTICK 0 #define PERFC_LOW_LEVEL_TYPE_PMU 1 #define PERFC_LOW_LEVEL_TYPE PERFC_LOW_LEVEL_TYPE_SYSTICK /* 0: system tick 1:pmu */ #if (PERFC_LOW_LEVEL_TYPE_PMU == PERFC_LOW_LEVEL_TYPE) #define __PERFC_CFG_DISABLE_DEFAULT_SYSTICK_PORTING__ 1 #define __PERFC_USE_PMU_PORTING__ 1 #define __PERFC_CFG_PORTING_INCLUDE__ "perfc_port_pmu.h" #endif
对接完成后添加如下测试代码,测试vTaskDelay(1000) 这个段代码期间的性能参数。
#if (PERFC_LOW_LEVEL_TYPE_PMU == PERFC_LOW_LEVEL_TYPE) /* measure cycles and store it in a dedicated variable without printf */ __cpu_perf__("delay_us(1000ul)"){ vTaskDelay(1000); } #elif (PERFC_LOW_LEVEL_TYPE_SYSTICK == PERFC_LOW_LEVEL_TYPE) int32_t iCycleResult = 0; start_cycle_counter(); vTaskDelay(1000); iCycleResult = stop_cycle_counter(); printf("\r\n delay_us(1000ul) takes %d cycles\r\n", (int)iCycleResult); #endif
执行结果如下,相对systick 的功能PMU 增加了CACHE 和 内存访问次数及CPI 的性能参数的打印输出。