]> git.itanic.dy.fi Git - linux-stable/commitdiff
perf/x86/ibs: Set mem_lvl_num, mem_remote and mem_hops for data_src
authorNamhyung Kim <namhyung@kernel.org>
Tue, 25 Jul 2023 15:02:06 +0000 (20:32 +0530)
committerPeter Zijlstra <peterz@infradead.org>
Wed, 26 Jul 2023 10:28:45 +0000 (12:28 +0200)
Kernel IBS driver wasn't using new PERF_MEM_* APIs due to some of its
limitations. Mainly:

1. mem_lvl_num doesn't allow setting multiple sources whereas old API
   allows it. Setting multiple data sources is useful because IBS on
   pre-zen4 uarch doesn't provide fine granular DataSrc details (there
   is only one such DataSrc(2h) though).
2. perf mem sorting logic (sort__lvl_cmp()) ignores mem_lvl_num. perf
   c2c (c2c_decode_stats()) does not use mem_lvl_num at all.

1st one can be handled using ANY_CACHE with HOPS_0. 2nd is purely perf
tool specific issue and should be fixed separately.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230725150206.184-4-ravi.bangoria@amd.com
arch/x86/events/amd/ibs.c

index 7d29be0a279bb0885065322ed532dfe53ef75688..6911c5399d02f359bac08f545f54b0e35b59752f 100644 (file)
@@ -728,38 +728,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
        return op_data2->data_src_lo;
 }
 
-static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
-                                union ibs_op_data3 *op_data3,
-                                struct perf_sample_data *data)
+#define        L(x)            (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
+#define        LN(x)           PERF_MEM_S(LVLNUM, x)
+#define        REM             PERF_MEM_S(REMOTE, REMOTE)
+#define        HOPS(x)         PERF_MEM_S(HOPS, x)
+
+static u64 g_data_src[8] = {
+       [IBS_DATA_SRC_LOC_CACHE]          = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
+       [IBS_DATA_SRC_DRAM]               = L(LOC_RAM) | LN(RAM),
+       [IBS_DATA_SRC_REM_CACHE]          = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
+       [IBS_DATA_SRC_IO]                 = L(IO) | LN(IO),
+};
+
+#define RMT_NODE_BITS                  (1 << IBS_DATA_SRC_DRAM)
+#define RMT_NODE_APPLICABLE(x)         (RMT_NODE_BITS & (1 << x))
+
+static u64 g_zen4_data_src[32] = {
+       [IBS_DATA_SRC_EXT_LOC_CACHE]      = L(L3) | LN(L3),
+       [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
+       [IBS_DATA_SRC_EXT_DRAM]           = L(LOC_RAM) | LN(RAM),
+       [IBS_DATA_SRC_EXT_FAR_CCX_CACHE]  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
+       [IBS_DATA_SRC_EXT_PMEM]           = LN(PMEM),
+       [IBS_DATA_SRC_EXT_IO]             = L(IO) | LN(IO),
+       [IBS_DATA_SRC_EXT_EXT_MEM]        = LN(CXL),
+};
+
+#define ZEN4_RMT_NODE_BITS             ((1 << IBS_DATA_SRC_EXT_DRAM) | \
+                                        (1 << IBS_DATA_SRC_EXT_PMEM) | \
+                                        (1 << IBS_DATA_SRC_EXT_EXT_MEM))
+#define ZEN4_RMT_NODE_APPLICABLE(x)    (ZEN4_RMT_NODE_BITS & (1 << x))
+
+static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
+                                 union ibs_op_data3 *op_data3,
+                                 struct perf_sample_data *data)
 {
        union perf_mem_data_src *data_src = &data->data_src;
        u8 ibs_data_src = perf_ibs_data_src(op_data2);
 
        data_src->mem_lvl = 0;
+       data_src->mem_lvl_num = 0;
 
        /*
         * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
         * memory accesses. So, check DcUcMemAcc bit early.
         */
-       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
-               data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
-               return;
-       }
+       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
+               return L(UNC) | LN(UNC);
 
        /* L1 Hit */
-       if (op_data3->dc_miss == 0) {
-               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
-               return;
-       }
+       if (op_data3->dc_miss == 0)
+               return L(L1) | LN(L1);
 
        /* L2 Hit */
        if (op_data3->l2_miss == 0) {
                /* Erratum #1293 */
                if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
-                   !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
-                       data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
-                       return;
-               }
+                   !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
+                       return L(L2) | LN(L2);
        }
 
        /*
@@ -769,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
        if (data_src->mem_op != PERF_MEM_OP_LOAD)
                goto check_mab;
 
-       /* L3 Hit */
        if (ibs_caps & IBS_CAPS_ZEN4) {
-               if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
-                       data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
-                       return;
-               }
-       } else {
-               if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
-                       data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
-                                           PERF_MEM_LVL_HIT;
-                       return;
-               }
-       }
+               u64 val = g_zen4_data_src[ibs_data_src];
 
-       /* A peer cache in a near CCX */
-       if (ibs_caps & IBS_CAPS_ZEN4 &&
-           ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
-               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
-               return;
-       }
+               if (!val)
+                       goto check_mab;
 
-       /* A peer cache in a far CCX */
-       if (ibs_caps & IBS_CAPS_ZEN4) {
-               if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
-                       data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
-                       return;
-               }
-       } else {
-               if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
-                       data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
-                       return;
+               /* HOPS_1 because IBS doesn't provide remote socket detail */
+               if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
+                       if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
+                               val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
+                       else
+                               val |= REM | HOPS(1);
                }
-       }
 
-       /* DRAM */
-       if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
-               if (op_data2->rmt_node == 0)
-                       data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
-               else
-                       data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
-               return;
-       }
+               return val;
+       } else {
+               u64 val = g_data_src[ibs_data_src];
 
-       /* PMEM */
-       if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
-               data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
-               if (op_data2->rmt_node) {
-                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-                       /* IBS doesn't provide Remote socket detail */
-                       data_src->mem_hops = PERF_MEM_HOPS_1;
-               }
-               return;
-       }
+               if (!val)
+                       goto check_mab;
 
-       /* Extension Memory */
-       if (ibs_caps & IBS_CAPS_ZEN4 &&
-           ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
-               data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
-               if (op_data2->rmt_node) {
-                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-                       /* IBS doesn't provide Remote socket detail */
-                       data_src->mem_hops = PERF_MEM_HOPS_1;
+               /* HOPS_1 because IBS doesn't provide remote socket detail */
+               if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
+                       if (ibs_data_src == IBS_DATA_SRC_DRAM)
+                               val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
+                       else
+                               val |= REM | HOPS(1);
                }
-               return;
-       }
 
-       /* IO */
-       if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
-               data_src->mem_lvl = PERF_MEM_LVL_IO;
-               data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
-               if (op_data2->rmt_node) {
-                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-                       /* IBS doesn't provide Remote socket detail */
-                       data_src->mem_hops = PERF_MEM_HOPS_1;
-               }
-               return;
+               return val;
        }
 
 check_mab:
@@ -855,12 +834,11 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
         * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
         * MAB only when IBS fails to provide DataSrc.
         */
-       if (op_data3->dc_miss_no_mab_alloc) {
-               data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
-               return;
-       }
+       if (op_data3->dc_miss_no_mab_alloc)
+               return L(LFB) | LN(LFB);
 
-       data_src->mem_lvl = PERF_MEM_LVL_NA;
+       /* Don't set HIT with NA */
+       return PERF_MEM_S(LVL, NA) | LN(NA);
 }
 
 static bool perf_ibs_cache_hit_st_valid(void)
@@ -950,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
                                  union ibs_op_data2 *op_data2,
                                  union ibs_op_data3 *op_data3)
 {
-       perf_ibs_get_mem_lvl(op_data2, op_data3, data);
+       union perf_mem_data_src *data_src = &data->data_src;
+
+       data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
        perf_ibs_get_mem_snoop(op_data2, data);
        perf_ibs_get_tlb_lvl(op_data3, data);
        perf_ibs_get_mem_lock(op_data3, data);