简介:
在之前的帖子(RA8 PMU 模块功能寄存器功能说明),我们已经介绍了PMU线管的寄存器,在之前的帖子(LVGL DMA2D/CPU搬运数据至framebuff性能比较)使用systick 对接的perf_counter 来计算代码运行所占用的cycle counter,这个监测代码运行周期和PMU 的 cycle counter 功能基本是一致的,perf counter 已经按照分层的思想设计的,底层只要适配perf counter 以来的接口即可,最新的代码已经适配了对接PMU 的代码,我们将PMU 对接到perf counter,底层只要是配依赖的接口即可,对应接口如下。

使用PMU 实现上述依赖底层实现的接口函数.
/*============================ PROTOTYPES ====================================*/
/* low level interface for porting */
extern
uint32_t perfc_port_get_system_timer_freq(void);
extern
int64_t perfc_port_get_system_timer_top(void);
extern
bool perfc_port_is_system_timer_ovf_pending(void);
extern
bool perfc_port_init_system_timer(bool bTimerOccupied);
extern
int64_t perfc_port_get_system_timer_elapsed(void);
extern
void perfc_port_clear_system_timer_ovf_pending(void);
extern
void perfc_port_stop_system_timer_counting(void);
extern
void perfc_port_clear_system_timer_counter(void);
bool perfc_port_init_system_timer(bool bIsTimeOccupied)
{
UNUSED_PARAM(bIsTimeOccupied);
if (!(PMU->TYPE & PMU_TYPE_CYCCNT_PRESENT_Msk)) {
return false;
}
__IRQ_SAFE {
PMU->CTRL &= ~PMU_CTRL_ENABLE_Msk;
perfc_port_stop_system_timer_counting();
/* disable PMU Cycle Counter interrupt */
PMU->INTENCLR = PMU_INTENCLR_CYCCNT_ENABLE_Msk;
perfc_port_clear_system_timer_counter();
perfc_port_clear_system_timer_ovf_pending();
/* reset all event counter */
PMU->CTRL |= PMU_CTRL_EVENTCNT_RESET_Msk;
/* configure event counter */
do {
uint_fast8_t chCounter = PMU->TYPE & PMU_TYPE_NUM_CNTS_Msk;
chCounter = MIN(chCounter, __PMU_NUM_EVENTCNT);
if (chCounter >= 2) {
/* 32 bit counter for instruction architecturally executed */
PMU->EVTYPER[0] = ARM_PMU_INST_RETIRED;
PMU->EVTYPER[1] = ARM_PMU_CHAIN;
/* clear counter 0/1 overflow flag */
PMU->OVSCLR = PMU_OVSCLR_CNT0_STATUS_Msk
| PMU_OVSCLR_CNT1_STATUS_Msk;
/* enable counter 1 interrupt */
PMU->INTENSET = PMU_INTENSET_CNT1_ENABLE_Msk;
/* enable counter 0/1 */
PMU->CNTENSET = PMU_CNTENSET_CNT0_ENABLE_Msk
| PMU_CNTENSET_CNT1_ENABLE_Msk;
}
if (chCounter >= 4) {
/* 32bit counter for all Data memory Accesses */
PMU->EVTYPER[2] = ARM_PMU_MEM_ACCESS;
PMU->EVTYPER[3] = ARM_PMU_CHAIN;
/* clear counter 2/3 overflow flag */
PMU->OVSCLR = PMU_OVSCLR_CNT2_STATUS_Msk
| PMU_OVSCLR_CNT3_STATUS_Msk;
/* enable counter 3 interrupt */
PMU->INTENSET = PMU_INTENSET_CNT3_ENABLE_Msk;
/* enable counter 2/3 */
PMU->CNTENSET = PMU_CNTENSET_CNT2_ENABLE_Msk
| PMU_CNTENSET_CNT3_ENABLE_Msk;
}
if (chCounter >= 6) {
/* 32bit counter for all Data memory Accesses */
PMU->EVTYPER[4] = ARM_PMU_L1D_CACHE_REFILL;
PMU->EVTYPER[5] = ARM_PMU_CHAIN;
/* clear counter 4/5 overflow flag */
PMU->OVSCLR = PMU_OVSCLR_CNT4_STATUS_Msk
| PMU_OVSCLR_CNT5_STATUS_Msk;
/* enable counter 5 interrupt */
PMU->INTENSET = PMU_INTENSET_CNT5_ENABLE_Msk;
/* enable counter 4/5 */
PMU->CNTENSET = PMU_CNTENSET_CNT4_ENABLE_Msk
| PMU_CNTENSET_CNT5_ENABLE_Msk;
}
if (chCounter > 6) {
for (uint_fast8_t n = 6; n < chCounter; n++) {
uint32_t wMask = (1<<n);
PMU->OVSCLR = wMask; /* clear overflow flag */
PMU->INTENSET = wMask; /* enable interrupt */
PMU->CNTENSET = wMask; /* enable counter */
}
}
} while(0);
DCB->DEMCR |= DCB_DEMCR_UMON_EN_Msk |
DCB_DEMCR_SDME_Msk |
DCB_DEMCR_TRCENA_Msk |
DCB_DEMCR_MON_EN_Msk ;
/* enable PMU Cycle Counter interrupt */
PMU->INTENSET = PMU_INTENSET_CCYCNT_ENABLE_Msk;
PMU->CNTENSET = PMU_CNTENSET_CCNTR_ENABLE_Msk;
PMU->CTRL |= PMU_CTRL_ENABLE_Msk;
/* force to disable DWT */
DWT->CTRL = 0;
}
return true;
}
uint64_t perfc_pmu_get_instruction_count(void)
{
uint32_t wHigh16, wLow16;
uint64_t dwResult;
bool bIsOverflow = false;
__IRQ_SAFE {
do {
wHigh16 = PMU->EVCNTR[1];
wLow16 = PMU->EVCNTR[0];
} while(wHigh16 < PMU->EVCNTR[1]);
dwResult = s_dwEventCounter[PMU_CNT_INSTRUCTION];
bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT1_STATUS_Msk));
}
dwResult += wLow16 | (wHigh16 << 16);
if (bIsOverflow) {
dwResult += (uint64_t)1<<32;
}
/* force to disable DWT */
DWT->CTRL = 0;
return dwResult;
}
uint64_t perfc_pmu_get_memory_access_count(void)
{
uint32_t wHigh16, wLow16;
uint64_t dwResult;
bool bIsOverflow = false;
__IRQ_SAFE {
do {
wHigh16 = PMU->EVCNTR[3];
wLow16 = PMU->EVCNTR[2];
} while(wHigh16 < PMU->EVCNTR[3]);
dwResult = s_dwEventCounter[PMU_CNT_MEM_ACCESS];
bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT3_STATUS_Msk));
}
dwResult += wLow16 | (wHigh16 << 16);
if (bIsOverflow) {
dwResult += (uint64_t)1<<32;
}
/* force to disable DWT */
DWT->CTRL = 0;
return dwResult;
}
uint64_t perfc_pmu_get_L1_dcache_refill_count(void)
{
uint32_t wHigh16, wLow16;
uint64_t dwResult;
bool bIsOverflow = false;
__IRQ_SAFE {
do {
wHigh16 = PMU->EVCNTR[5];
wLow16 = PMU->EVCNTR[4];
} while(wHigh16 < PMU->EVCNTR[5]);
dwResult = s_dwEventCounter[PMU_CNT_L1_DCACHE_REFILL];
bIsOverflow = (0 != (PMU->OVSCLR & PMU_OVSCLR_CNT5_STATUS_Msk));
}
dwResult += wLow16 | (wHigh16 << 16);
if (bIsOverflow) {
dwResult += (uint64_t)1<<32;
}
/* force to disable DWT */
DWT->CTRL = 0;
return dwResult;
}
uint32_t perfc_port_get_system_timer_freq(void)
{
extern uint32_t SystemCoreClock;
/* return the system timer frequency */
return SystemCoreClock;
}
bool perfc_port_is_system_timer_ovf_pending(void)
{
/* whether the system timer overflow is pending */
return PMU->OVSSET & PMU_OVSSET_CYCCNT_STATUS_Msk;
}
int64_t perfc_port_get_system_timer_top(void)
{
/* the top value of the counting */
return 0xFFFFFFFF;
}
int64_t perfc_port_get_system_timer_elapsed(void)
{
return (int64_t)PMU->CCNTR;//ARM_PMU_Get_CCNTR();
}
void perfc_port_clear_system_timer_ovf_pending(void)
{
PMU->OVSCLR = PMU_OVSCLR_CYCCNT_STATUS_Msk;
}
void perfc_port_stop_system_timer_counting(void)
{
/* stop the system timer */
PMU->CNTENCLR = PMU_CNTENCLR_CCNTR_ENABLE_Msk;
}
void perfc_port_clear_system_timer_counter(void)
{
/* clear the system timer counter */
PMU->CTRL |= PMU_CTRL_CYCCNT_RESET_Msk;
}将上述PMU 底层实现接口添加到工程,并配置perf_counter 使用PMU 作为服务提供者,配置文件修改如下:
#define PERFC_LOW_LEVEL_TYPE_SYSTICK 0 #define PERFC_LOW_LEVEL_TYPE_PMU 1 #define PERFC_LOW_LEVEL_TYPE PERFC_LOW_LEVEL_TYPE_SYSTICK /* 0: system tick 1:pmu */ #if (PERFC_LOW_LEVEL_TYPE_PMU == PERFC_LOW_LEVEL_TYPE) #define __PERFC_CFG_DISABLE_DEFAULT_SYSTICK_PORTING__ 1 #define __PERFC_USE_PMU_PORTING__ 1 #define __PERFC_CFG_PORTING_INCLUDE__ "perfc_port_pmu.h" #endif
对接完成后添加如下测试代码,测试vTaskDelay(1000) 这个段代码期间的性能参数。
#if (PERFC_LOW_LEVEL_TYPE_PMU == PERFC_LOW_LEVEL_TYPE)
/* measure cycles and store it in a dedicated variable without printf */
__cpu_perf__("delay_us(1000ul)"){
vTaskDelay(1000);
}
#elif (PERFC_LOW_LEVEL_TYPE_SYSTICK == PERFC_LOW_LEVEL_TYPE)
int32_t iCycleResult = 0;
start_cycle_counter();
vTaskDelay(1000);
iCycleResult = stop_cycle_counter();
printf("\r\n delay_us(1000ul) takes %d cycles\r\n", (int)iCycleResult);
#endif执行结果如下,相对systick 的功能PMU 增加了CACHE 和 内存访问次数及CPI 的性能参数的打印输出。

我要赚赏金
