From 06afa4a49a7ae2e8b038c82487b7d85761f8d07b Mon Sep 17 00:00:00 2001 From: Treece Burgess Date: Tue, 3 Sep 2024 14:52:15 +0000 Subject: [PATCH] Update libpfm4 Current with commit 0d799b5546477a46b3a52310bbf1884d56e9e37f Author: Stephane Eranian Date: Mon Sep 2 21:51:03 2024 -0700 update Intel GraniteRapids core PMU to 1.03 Updates the Intel GraniteRapids core PMU event table to latest Intel released version: Date : 08/19/2024 Version: 1.03 From gitub.com/Intel/perfmon Signed-off-by: Stephane Eranian update Intel GraniteRapids core PMU event table Update to upstream version 1.03 Note: Unable to test Intel Granite Rapids updates due to the PAPI team not having access to a machine with Granite Rapids. --- src/libpfm4/lib/events/intel_gnr_events.h | 176 +++++++++++++++++++++- 1 file changed, 171 insertions(+), 5 deletions(-) diff --git a/src/libpfm4/lib/events/intel_gnr_events.h b/src/libpfm4/lib/events/intel_gnr_events.h index 0f1e4ec73..703b3fad7 100644 --- a/src/libpfm4/lib/events/intel_gnr_events.h +++ b/src/libpfm4/lib/events/intel_gnr_events.h @@ -22,8 +22,8 @@ * applications on Linux. * * PMU: gnr (GraniteRapids) - * Based on Intel JSON event table version : 1.02 - * Based on Intel JSON event table published : 05/10/2024 + * Based on Intel JSON event table version : 1.03 + * Based on Intel JSON event table published : 08/19/2024 */ static const intel_x86_umask_t intel_gnr_arith[]={ @@ -1471,6 +1471,7 @@ static const intel_x86_umask_t intel_gnr_mem_trans_retired[]={ { .uname = "LOAD_LATENCY_GT_16", .udesc = "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", .ucode = 0x100ull, + .uequiv = "LOAD_LATENCY:ldlat=16", .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, }, { .uname = "LOAD_LATENCY_GT_2048", @@ -1597,6 +1598,11 @@ static const intel_x86_umask_t intel_gnr_ocr[]={ .ucode = 0x10003c000100ull, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "DEMAND_DATA_RD_L3_HIT_SNOOP_HIT_NO_FWD", + .udesc = "Counts demand data reads that resulted in a snoop that hit in another core, which did not forward the data.", + .ucode = 0x4003c000100ull, + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "DEMAND_DATA_RD_L3_HIT_SNOOP_HIT_WITH_FWD", .udesc = "Counts demand data reads that resulted in a snoop hit in another core's caches which forwarded the unmodified data to the requesting core.", .ucode = 0x8003c000100ull, @@ -1607,6 +1613,21 @@ static const intel_x86_umask_t intel_gnr_ocr[]={ .ucode = 0x3fbfc0000100ull, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "DEMAND_DATA_RD_REMOTE_CACHE_SNOOP_HITM", + .udesc = "Counts demand data reads that were supplied by a cache on a remote socket where a snoop hit a modified line in another core's caches which forwarded the data.", + .ucode = 0x103000000100ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "DEMAND_DATA_RD_REMOTE_CACHE_SNOOP_HIT_WITH_FWD", + .udesc = "Counts demand data reads that were supplied by a cache on a remote socket where a snoop hit in another core's caches which forwarded the unmodified data to the requesting core.", + .ucode = 0x83000000100ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "DEMAND_RFO_ANY_RESPONSE", + .udesc = "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + .ucode = 0x3f3ffc000200ull, + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "DEMAND_RFO_DRAM", .udesc = "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by DRAM.", .ucode = 0x73c00000200ull, @@ -1652,16 +1673,80 @@ static const intel_x86_umask_t intel_gnr_ocr[]={ .ucode = 0x3f003c447700ull, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "READS_TO_CORE_L3_HIT_SNOOP_HITM", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that resulted in a snoop hit a modified line in another core's caches which forwarded the data.", + .ucode = 0x10003c447700ull, + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "READS_TO_CORE_L3_MISS", .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were not supplied by the local socket's L1, L2, or L3 caches.", .ucode = 0x3f3fc0447700ull, .uflags = INTEL_X86_NCOMBO, }, - { .uname = "STREAMING_WR_ANY_RESPONSE", + { .uname = "READS_TO_CORE_L3_MISS_LOCAL", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were not supplied by the local socket's L1, L2, or L3 caches and the cacheline is homed locally.", + .ucode = 0x3f04c0447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_TO_CORE_L3_MISS_LOCAL_SOCKET", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that missed the L3 Cache and were supplied by the local socket (DRAM or PMM), whether or not in Sub NUMA Cluster(SNC) Mode. In SNC Mode counts PMM or DRAM accesses that are controlled by the close or distant SNC Cluster. It does not count misses to the L3 which go to Local CXL Type 2 Memory or Local Non DRAM.", + .ucode = 0x70cc0447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_TO_CORE_LOCAL_DRAM", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by DRAM attached to this socket, unless in Sub NUMA Cluster(SNC) Mode. In SNC Mode counts only those DRAM accesses that are controlled by the close SNC Cluster.", + .ucode = 0x10400447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_TO_CORE_LOCAL_SOCKET_DRAM", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by DRAM attached to this socket, whether or not in Sub NUMA Cluster(SNC) Mode. In SNC Mode counts DRAM accesses that are controlled by the close or distant SNC Cluster.", + .ucode = 0x70c00447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_TO_CORE_REMOTE", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were not supplied by the local socket's L1, L2, or L3 caches and were supplied by a remote socket.", + .ucode = 0x3f3300447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_TO_CORE_REMOTE_CACHE_SNOOP_FWD", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by a cache on a remote socket where a snoop was sent and data was returned (Modified or Not Modified).", + .ucode = 0x183000447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_TO_CORE_REMOTE_CACHE_SNOOP_HITM", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by a cache on a remote socket where a snoop hit a modified line in another core's caches which forwarded the data.", + .ucode = 0x103000447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_TO_CORE_REMOTE_CACHE_SNOOP_HIT_WITH_FWD", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by a cache on a remote socket where a snoop hit in another core's caches which forwarded the unmodified data to the requesting core.", + .ucode = 0x83000447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_TO_CORE_REMOTE_DRAM", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by DRAM attached to another socket.", + .ucode = 0x73000447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "READS_TO_CORE_REMOTE_MEMORY", + .udesc = "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by DRAM or PMM attached to another socket.", + .ucode = 0x73300447700ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "RFO_TO_CORE_L3_HIT_M", + .udesc = "Counts demand reads for ownership (RFO), hardware prefetch RFOs (which bring data to L2), and software prefetches for exclusive ownership (PREFETCHW) that hit to a (M)odified cacheline in the L3 or snoop filter.", + .ucode = 0x1f8004002200ull, + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "STREAMING_WR_ANY_RESPONSE", .udesc = "Counts streaming stores that have any type of response.", .ucode = 0x1080000ull, .uflags = INTEL_X86_NCOMBO, }, + { .uname = "WRITE_ESTIMATE_MEMORY", + .udesc = "Counts Demand RFOs, ItoM's, PREFECTHW's, Hardware RFO Prefetches to the L1/L2 and Streaming stores that likely resulted in a store to Memory (DRAM or PMM)", + .ucode = 0xfbff8082200ull, + .uflags = INTEL_X86_NCOMBO, + }, }; static const intel_x86_umask_t intel_gnr_offcore_requests[]={ @@ -1756,10 +1841,15 @@ static const intel_x86_umask_t intel_gnr_offcore_requests_outstanding[]={ }; static const intel_x86_umask_t intel_gnr_resource_stalls[]={ + { .uname = "SB", + .udesc = "Cycles stalled due to no store buffers available. (not including draining form sync).", + .ucode = 0x0800ull, + .uflags = INTEL_X86_NCOMBO, + }, { .uname = "SCOREBOARD", .udesc = "Counts cycles where the pipeline is stalled due to serializing operations.", .ucode = 0x0200ull, - .uflags = INTEL_X86_DFL, + .uflags = INTEL_X86_NCOMBO, }, }; @@ -1782,6 +1872,44 @@ static const intel_x86_umask_t intel_gnr_rs[]={ }, }; +static const intel_x86_umask_t intel_gnr_rtm_retired[]={ + { .uname = "ABORTED", + .udesc = "Number of times an RTM execution aborted.", + .ucode = 0x0400ull, + .uflags = INTEL_X86_NCOMBO | INTEL_X86_PEBS, + }, + { .uname = "ABORTED_EVENTS", + .udesc = "Number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt)", + .ucode = 0x8000ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ABORTED_MEM", + .udesc = "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", + .ucode = 0x0800ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ABORTED_MEMTYPE", + .udesc = "Number of times an RTM execution aborted due to incompatible memory type", + .ucode = 0x4000ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ABORTED_UNFRIENDLY", + .udesc = "Number of times an RTM execution aborted due to HLE-unfriendly instructions", + .ucode = 0x2000ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "COMMIT", + .udesc = "Number of times an RTM execution successfully committed", + .ucode = 0x0200ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "START", + .udesc = "Number of times an RTM execution started.", + .ucode = 0x0100ull, + .uflags = INTEL_X86_NCOMBO, + }, +}; + static const intel_x86_umask_t intel_gnr_sq_misc[]={ { .uname = "BUS_LOCK", .udesc = "Counts bus locks, accounts for cache line split locks and UC locks.", @@ -1851,6 +1979,24 @@ static const intel_x86_umask_t intel_gnr_topdown[]={ }, }; +static const intel_x86_umask_t intel_gnr_tx_mem[]={ + { .uname = "ABORT_CAPACITY_READ", + .udesc = "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional reads", + .ucode = 0x8000ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ABORT_CAPACITY_WRITE", + .udesc = "Speculatively counts the number of TSX aborts due to a data capacity limitation for transactional writes.", + .ucode = 0x0200ull, + .uflags = INTEL_X86_NCOMBO, + }, + { .uname = "ABORT_CONFLICT", + .udesc = "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", + .ucode = 0x0100ull, + .uflags = INTEL_X86_NCOMBO, + }, +}; + static const intel_x86_umask_t intel_gnr_uops_decoded[]={ { .uname = "DEC0_UOPS", .udesc = "Number of non dec-by-all uops decoded by decoder", @@ -2615,6 +2761,16 @@ static const intel_x86_entry_t intel_gnr_pe[]={ .numasks= LIBPFM_ARRAY_SIZE(intel_gnr_rs), .umasks = intel_gnr_rs, }, + { .name = "RTM_RETIRED", + .desc = "Number of times an RTM execution started.", + .code = 0x00c9, + .modmsk = INTEL_V5_ATTRS, + .cntmsk = 0xffull, + .ngrp = 1, + .flags = INTEL_X86_PEBS, + .numasks= LIBPFM_ARRAY_SIZE(intel_gnr_rtm_retired), + .umasks = intel_gnr_rtm_retired, + }, { .name = "SQ_MISC", .desc = "Miscellaneous SQ activity.", .code = 0x002c, @@ -2645,6 +2801,16 @@ static const intel_x86_entry_t intel_gnr_pe[]={ .numasks= LIBPFM_ARRAY_SIZE(intel_gnr_topdown), .umasks = intel_gnr_topdown, }, + { .name = "TX_MEM", + .desc = "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", + .code = 0x0054, + .modmsk = INTEL_V5_ATTRS, + .cntmsk = 0xfull, + .ngrp = 1, + .flags = INTEL_X86_SPEC, + .numasks= LIBPFM_ARRAY_SIZE(intel_gnr_tx_mem), + .umasks = intel_gnr_tx_mem, + }, { .name = "UOPS_DECODED", .desc = "Uops decoded.", .code = 0x0076, @@ -2705,4 +2871,4 @@ static const intel_x86_entry_t intel_gnr_pe[]={ .umasks = intel_gnr_xq, }, }; -/* 65 events available */ +/* 67 events available */