From 942d7be7e92689d98a2c027b71fedf47b49f709c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 7 Feb 2024 14:42:02 +0100 Subject: [PATCH] redo benchmarks for qdp --- qdp_project/evaluation-results/perf.svg | 1564 ++++++++++------- ...tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv | 10 +- ...cb0-tcj1-tmul8-wl4294967296-cs16777216.csv | 6 - ...cb1-tcj1-tmul8-wl4294967296-cs16777216.csv | 10 +- ...cb1-tcj1-tmul8-wl4294967296-cs16777216.csv | 10 +- 5 files changed, 939 insertions(+), 661 deletions(-) delete mode 100644 qdp_project/evaluation-results/qdp-xeonmax-prefetch-aggrj-tca2-tcb0-tcj1-tmul8-wl4294967296-cs16777216.csv diff --git a/qdp_project/evaluation-results/perf.svg b/qdp_project/evaluation-results/perf.svg index a5bbe55..afa37ef 100644 --- a/qdp_project/evaluation-results/perf.svg +++ b/qdp_project/evaluation-results/perf.svg @@ -1,6 +1,6 @@ - + @@ -421,1285 +421,1569 @@ } ]]> - + Flame Graph - + Reset Zoom Search ic - + -[[kernel.kallsyms]] (44,499,432 samples, 0.23%) - +[[kernel.kallsyms]] (1,094,452 samples, 0.01%) + -__libc_fork (2,891,036 samples, 0.02%) - +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + -[[kernel.kallsyms]] (2,440,610 samples, 0.01%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -[[kernel.kallsyms]] (1,737,042 samples, 0.01%) - +[[kernel.kallsyms]] (1,662,976 samples, 0.01%) + -[[kernel.kallsyms]] (4,326,736 samples, 0.02%) - +[[kernel.kallsyms]] (18,810,377 samples, 0.15%) + -__GI___libc_read (3,390,306 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,656 samples, 0.01%) + -arch_fork (3,294,563 samples, 0.02%) - +Vector_Loader<unsigned long, (279,005,046 samples, 2.18%) +V.. -[[kernel.kallsyms]] (2,010,420 samples, 0.01%) - +dl_open_worker_begin (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (2,522,592 samples, 0.01%) - +[[kernel.kallsyms]] (1,562,656 samples, 0.01%) + -[libstdc++.so.6.0.32] (2,954,658,675 samples, 15.45%) -[libstdc++.so.6.0.32] +[[kernel.kallsyms]] (5,959,122 samples, 0.05%) + -[[kernel.kallsyms]] (4,339,346 samples, 0.02%) - +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + -[[kernel.kallsyms]] (3,315,227 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,164 samples, 0.01%) + -[unknown] (7,524,590,403 samples, 39.35%) -[unknown] +[[kernel.kallsyms]] (1,832,823,392 samples, 14.33%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (27,425,987 samples, 0.14%) - +[[kernel.kallsyms]] (1,106,913 samples, 0.01%) + -[[kernel.kallsyms]] (3,145,217 samples, 0.02%) - +__GI___clock_gettime (141,009,347 samples, 1.10%) + -[libstdc++.so.6.0.32] (22,414,514 samples, 0.12%) - +_int_malloc (2,585,739 samples, 0.02%) + -[QDPBench] (2,954,658,675 samples, 15.45%) -[QDPBench] +[[kernel.kallsyms]] (25,788,811 samples, 0.20%) + -__libc_fork (3,294,563 samples, 0.02%) - +_dl_catch_error (1,262,425 samples, 0.01%) + -sudo (24,089,407 samples, 0.13%) - +clock_gettime@plt (24,322,123 samples, 0.19%) + -[[kernel.kallsyms]] (3,294,563 samples, 0.02%) - +[[kernel.kallsyms]] (1,143,574 samples, 0.01%) + -[[kernel.kallsyms]] (6,080,995 samples, 0.03%) - +devices_init (5,339,194 samples, 0.04%) + -[[kernel.kallsyms]] (3,386,595 samples, 0.02%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -[dash] (7,306,765 samples, 0.04%) - +std::chrono::_V2::steady_clock::now (84,598,147 samples, 0.66%) + -[[kernel.kallsyms]] (4,009,567 samples, 0.02%) - +_IO_new_file_underflow (3,448,158 samples, 0.03%) + -[[kernel.kallsyms]] (3,294,563 samples, 0.02%) - +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + -[[kernel.kallsyms]] (26,632,596 samples, 0.14%) - +[[kernel.kallsyms]] (2,585,739 samples, 0.02%) + -std::chrono::_V2::steady_clock::now (68,929,082 samples, 0.36%) - +dsacache::Cache::Access (1,735,833 samples, 0.01%) + -[[kernel.kallsyms]] (1,766,715 samples, 0.01%) - +dml::core::dispatcher::hw_dispatcher::get_instance (11,911,549 samples, 0.09%) + -[[kernel.kallsyms]] (1,705,251 samples, 0.01%) - +std::barrier<NopStruct>::wait (2,605,852 samples, 0.02%) + -[[kernel.kallsyms]] (1,731,013 samples, 0.01%) - +dsacache::Cache::AllocOnNode (5,157,234 samples, 0.04%) + -[[kernel.kallsyms]] (1,766,715 samples, 0.01%) - +dml::detail::ml::impl::hardware::submit (11,911,549 samples, 0.09%) + -[[kernel.kallsyms]] (3,386,595 samples, 0.02%) - +[unknown] (479,146,399 samples, 3.75%) +[unk.. -[[kernel.kallsyms]] (3,766,689 samples, 0.02%) - +[[kernel.kallsyms]] (1,094,452 samples, 0.01%) + -__GI__Fork (2,457,353 samples, 0.01%) - +scan_b (46,025,734 samples, 0.36%) + -[[kernel.kallsyms]] (1,904,055,605 samples, 9.96%) -[[kernel.kalls.. +[[kernel.kallsyms]] (2,589,863 samples, 0.02%) + -[[kernel.kallsyms]] (4,702,672 samples, 0.02%) - +dml::handler<dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >::allocator_type> dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (40,290,223 samples, 0.32%) + -[[kernel.kallsyms]] (4,326,736 samples, 0.02%) - +read (1,775,776 samples, 0.01%) + -[[kernel.kallsyms]] (5,574,118 samples, 0.03%) - +[[kernel.kallsyms]] (10,426,718 samples, 0.08%) + -[[kernel.kallsyms]] (6,080,995 samples, 0.03%) - +[[kernel.kallsyms]] (10,426,718 samples, 0.08%) + -[[kernel.kallsyms]] (1,766,715 samples, 0.01%) - +[[kernel.kallsyms]] (15,448,011 samples, 0.12%) + -dlopen_doit (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (3,452,194 samples, 0.03%) + -[sudoers.so] (2,495,463 samples, 0.01%) - +[[kernel.kallsyms]] (1,273,668,574 samples, 9.96%) +[[kernel.kalls.. -[[kernel.kallsyms]] (1,766,715 samples, 0.01%) - +__GI__IO_doallocbuf (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (4,326,736 samples, 0.02%) - +[[kernel.kallsyms]] (2,587,208 samples, 0.02%) + -[[kernel.kallsyms]] (1,780,380 samples, 0.01%) - +Vector_Loader<unsigned long, (201,075,858 samples, 1.57%) + -[[kernel.kallsyms]] (4,326,736 samples, 0.02%) - +[[kernel.kallsyms]] (1,094,452 samples, 0.01%) + -[[kernel.kallsyms]] (3,766,689 samples, 0.02%) - +[[kernel.kallsyms]] (25,788,811 samples, 0.20%) + -_dl_sysdep_start (1,881,846 samples, 0.01%) - +[[kernel.kallsyms]] (6,243,993 samples, 0.05%) + -[[kernel.kallsyms]] (2,293,644 samples, 0.01%) - +[[kernel.kallsyms]] (6,243,993 samples, 0.05%) + -[[kernel.kallsyms]] (5,172,283 samples, 0.03%) - +add_wq (8,897,243 samples, 0.07%) + -[[kernel.kallsyms]] (2,010,420 samples, 0.01%) - +[[kernel.kallsyms]] (1,662,976 samples, 0.01%) + -[sudo] (11,228,095 samples, 0.06%) - +[[kernel.kallsyms]] (1,739,978 samples, 0.01%) + -[[kernel.kallsyms]] (18,146,072 samples, 0.09%) - +QDPBench (12,788,400,850 samples, 100.00%) +QDPBench -[[kernel.kallsyms]] (21,470,187 samples, 0.11%) - +[[kernel.kallsyms]] (1,106,913 samples, 0.01%) + -[[kernel.kallsyms]] (18,006,589 samples, 0.09%) - +[[kernel.kallsyms]] (1,832,823,392 samples, 14.33%) +[[kernel.kallsyms]] -_dlerror_run (1,669,717 samples, 0.01%) - +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::_M_gen_rand (407,167,062 samples, 3.18%) +std.. -[[kernel.kallsyms]] (2,522,592 samples, 0.01%) - +[[kernel.kallsyms]] (6,243,993 samples, 0.05%) + -[[kernel.kallsyms]] (1,737,042 samples, 0.01%) - +void std::__detail::__platform_wait<int> (1,737,280 samples, 0.01%) + -[[kernel.kallsyms]] (4,326,736 samples, 0.02%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (1,944,872,025 samples, 15.21%) +unsigned long std::unif.. -[[kernel.kallsyms]] (2,526,230 samples, 0.01%) - +[[kernel.kallsyms]] (1,830,230,761 samples, 14.31%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (1,677,612 samples, 0.01%) - +sysmalloc (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +[[kernel.kallsyms]] (1,662,976 samples, 0.01%) + -[[kernel.kallsyms]] (1,905,789,850 samples, 9.97%) -[[kernel.kalls.. +[[kernel.kallsyms]] (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (7,679,166 samples, 0.04%) - +[[kernel.kallsyms]] (17,941,270 samples, 0.14%) + -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +sysmalloc (28,378,674 samples, 0.22%) + -[dash] (7,306,765 samples, 0.04%) - +[[kernel.kallsyms]] (1,106,913 samples, 0.01%) + -[[kernel.kallsyms]] (2,457,353 samples, 0.01%) - +[[kernel.kallsyms]] (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (6,948,349 samples, 0.04%) - +[[kernel.kallsyms]] (5,089,546 samples, 0.04%) + -[libstdc++.so.6.0.32] (74,345,703 samples, 0.39%) - +groups_init (3,101,168 samples, 0.02%) + -[[kernel.kallsyms]] (6,948,349 samples, 0.04%) - +[[kernel.kallsyms]] (10,426,718 samples, 0.08%) + -[[kernel.kallsyms]] (3,390,306 samples, 0.02%) - +std::__detail::__waiter_pool::_M_do_wait (1,735,727 samples, 0.01%) + -[unknown] (5,895,072 samples, 0.03%) - +dl_main (2,239,586 samples, 0.02%) + -[libstdc++.so.6.0.32] (30,688,312 samples, 0.16%) - +[[kernel.kallsyms]] (1,724,889 samples, 0.01%) + -[[kernel.kallsyms]] (2,955,258 samples, 0.02%) - +[[kernel.kallsyms]] (1,724,889 samples, 0.01%) + -[[kernel.kallsyms]] (2,522,592 samples, 0.01%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -__GI_munmap (19,010,115 samples, 0.10%) - +device_parse (3,101,168 samples, 0.02%) + -[[kernel.kallsyms]] (4,339,346 samples, 0.02%) - +[[stack]] (319,807,366 samples, 2.50%) +[[.. -[[kernel.kallsyms]] (3,390,306 samples, 0.02%) - +std::thread::_M_start_thread (2,587,208 samples, 0.02%) + -[[kernel.kallsyms]] (2,522,592 samples, 0.01%) - +[[kernel.kallsyms]] (3,733,433 samples, 0.03%) + -[[kernel.kallsyms]] (7,679,166 samples, 0.04%) - +__sysfs_device_parse (3,101,168 samples, 0.02%) + -[[kernel.kallsyms]] (3,315,227 samples, 0.02%) - +[[kernel.kallsyms]] (17,084,810 samples, 0.13%) + -[[kernel.kallsyms]] (5,574,118 samples, 0.03%) - +[[kernel.kallsyms]] (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (16,282,930 samples, 0.09%) - +[[kernel.kallsyms]] (5,089,546 samples, 0.04%) + -[[kernel.kallsyms]] (4,009,567 samples, 0.02%) - +[[kernel.kallsyms]] (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (20,608,832 samples, 0.11%) - +__libc_start_main_impl (4,778,433,373 samples, 37.37%) +__libc_start_main_impl -[libpam.so.0.85.1] (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (1,106,913 samples, 0.01%) + -[[kernel.kallsyms]] (2,882,528 samples, 0.02%) - +syscall (1,735,727 samples, 0.01%) + -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +_mm512_cmplt_epi64_mask (3,470,584 samples, 0.03%) + -[dash] (7,306,765 samples, 0.04%) - +[[kernel.kallsyms]] (1,094,452 samples, 0.01%) + -[[kernel.kallsyms]] (2,457,353 samples, 0.01%) - +advise_stack_range (1,737,656 samples, 0.01%) + -_dl_start_final (1,881,846 samples, 0.01%) - +[[kernel.kallsyms]] (25,788,811 samples, 0.20%) + -[[kernel.kallsyms]] (44,499,432 samples, 0.23%) - +void std::__detail::__waiter<std::integral_constant<bool, true> >::_M_do_wait<std::__tree_barrier<NopStruct>::wait (3,474,594 samples, 0.03%) + -[[kernel.kallsyms]] (2,558,148 samples, 0.01%) - +[[kernel.kallsyms]] (1,143,574 samples, 0.01%) + -__GI__Fork (2,571,689 samples, 0.01%) - +dsacache::Cache::SubmitTask (11,911,549 samples, 0.09%) + -___dlopen (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (24,927,309 samples, 0.19%) + -__libc_start_main_impl (12,997,262 samples, 0.07%) - +__GI_mprotect (25,788,811 samples, 0.20%) + -[[kernel.kallsyms]] (2,457,353 samples, 0.01%) - +Filter<unsigned long, LT, (3,282,302,648 samples, 25.67%) +Filter<unsigned long, LT, -[[kernel.kallsyms]] (5,172,283 samples, 0.03%) - +_int_memalign (28,378,674 samples, 0.22%) + -__libc_openat64 (5,722,742 samples, 0.03%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (2,210,412,779 samples, 17.28%) +unsigned long std::uniform.. -[[kernel.kallsyms]] (3,386,595 samples, 0.02%) - +[[kernel.kallsyms]] (2,589,863 samples, 0.02%) + -numactl (2,597,375 samples, 0.01%) - +dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (28,378,674 samples, 0.22%) + -[[kernel.kallsyms]] (5,574,118 samples, 0.03%) - +dsacache::Cache::GetCacheNode (1,735,833 samples, 0.01%) + -[[kernel.kallsyms]] (7,679,166 samples, 0.04%) - +[[kernel.kallsyms]] (25,788,811 samples, 0.20%) + -[[kernel.kallsyms]] (4,326,736 samples, 0.02%) - +[[kernel.kallsyms]] (1,143,574 samples, 0.01%) + -dlopen_implementation (1,669,717 samples, 0.01%) - +void std::__atomic_wait_address<std::__barrier_phase_t, std::__tree_barrier<NopStruct>::wait (3,474,594 samples, 0.03%) + -_start (1,881,846 samples, 0.01%) - +[[kernel.kallsyms]] (1,508,809,254 samples, 11.80%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (4,493,208 samples, 0.02%) - +[[kernel.kallsyms]] (6,948,430 samples, 0.05%) + -[[kernel.kallsyms]] (2,499,962 samples, 0.01%) - +[[kernel.kallsyms]] (1,148,662 samples, 0.01%) + -[[kernel.kallsyms]] (6,948,349 samples, 0.04%) - +[[kernel.kallsyms]] (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (3,390,306 samples, 0.02%) - +_dl_load_cache_lookup (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (2,526,230 samples, 0.01%) - +auto dml::detail::submit<dml::hardware, dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >, dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (40,290,223 samples, 0.32%) + -[dash] (1,684,986 samples, 0.01%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -[QDPBench] (241,769,126 samples, 1.26%) - +[[kernel.kallsyms]] (1,662,976 samples, 0.01%) + -[[kernel.kallsyms]] (44,499,432 samples, 0.23%) - +[[kernel.kallsyms]] (1,737,656 samples, 0.01%) + -[libpam.so.0.85.1] (1,669,717 samples, 0.01%) - +syscall (1,737,164 samples, 0.01%) + -[sudo] (12,997,262 samples, 0.07%) - +[[kernel.kallsyms]] (17,941,270 samples, 0.14%) + -_int_malloc (1,813,649 samples, 0.01%) - +[[kernel.kallsyms]] (25,788,811 samples, 0.20%) + -[[kernel.kallsyms]] (3,315,227 samples, 0.02%) - +_dl_sysdep_read_whole_file (1,262,425 samples, 0.01%) + -_dl_open (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (1,725,071 samples, 0.01%) + -[[kernel.kallsyms]] (6,948,349 samples, 0.04%) - +[[kernel.kallsyms]] (1,143,574 samples, 0.01%) + -[[kernel.kallsyms]] (3,315,227 samples, 0.02%) - +[[kernel.kallsyms]] (1,262,425 samples, 0.01%) + -[anon] (409,735,084 samples, 2.14%) -[.. +[[kernel.kallsyms]] (4,158,226 samples, 0.03%) + -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +__GI___libc_read (1,775,776 samples, 0.01%) + -[[kernel.kallsyms]] (2,526,230 samples, 0.01%) - +operator new (28,378,674 samples, 0.22%) + -[[kernel.kallsyms]] (41,985,308 samples, 0.22%) - +std::common_type<std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >::type std::chrono::operator-<long, std::ratio<1l, 1000000000l>, long, std::ratio<1l, 1000000000l> > (3,498,307 samples, 0.03%) + -syscall (5,574,118 samples, 0.03%) - +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (1,295,771,478 samples, 10.13%) +unsigned int s.. -date (3,218,439 samples, 0.02%) - +[[kernel.kallsyms]] (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (2,571,689 samples, 0.01%) - +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + -[[kernel.kallsyms]] (22,340,223 samples, 0.12%) - +[[kernel.kallsyms]] (5,959,122 samples, 0.05%) + -QDPBench (19,080,136,302 samples, 99.79%) -QDPBench +syscall (1,737,280 samples, 0.01%) + -[[kernel.kallsyms]] (3,281,568 samples, 0.02%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -[[kernel.kallsyms]] (22,340,223 samples, 0.12%) - +[[kernel.kallsyms]] (10,320,932 samples, 0.08%) + -__libc_start_main_impl (7,306,765 samples, 0.04%) - +void caching<1ul> (45,447,457 samples, 0.36%) + -[[kernel.kallsyms]] (44,499,432 samples, 0.23%) - +__libc_start_call_main (4,778,433,373 samples, 37.37%) +__libc_start_call_main -clock_gettime@plt (32,690,246 samples, 0.17%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -[QDPBench] (276,448,453 samples, 1.45%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -[[kernel.kallsyms]] (1,904,922,585 samples, 9.96%) -[[kernel.kalls.. +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + -[[kernel.kallsyms]] (1,777,927 samples, 0.01%) - +__GI_mprotect (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (3,315,227 samples, 0.02%) - +std::__detail::__waiter_base<std::__detail::__waiter_pool>::_M_notify (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (2,955,258 samples, 0.02%) - +[[kernel.kallsyms]] (1,148,662 samples, 0.01%) + -arch_fork (2,571,689 samples, 0.01%) - +[[kernel.kallsyms]] (1,724,889 samples, 0.01%) + -__GI___clock_gettime (22,414,514 samples, 0.12%) - +__GI___getdelim (3,448,158 samples, 0.03%) + -__GI___libc_read (3,390,306 samples, 0.02%) - +__GI_munmap (10,426,718 samples, 0.08%) + -[sudoers.so] (2,495,463 samples, 0.01%) - +std::__atomic_ref<std::__barrier_phase_t, false, false>::notify_all (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (1,737,042 samples, 0.01%) - +[[kernel.kallsyms]] (1,106,913 samples, 0.01%) + -[sudoers.so] (2,495,463 samples, 0.01%) - +[[kernel.kallsyms]] (10,426,718 samples, 0.08%) + -[[kernel.kallsyms]] (1,731,013 samples, 0.01%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -__GI___clock_gettime (30,688,312 samples, 0.16%) - +dml::core::dispatcher::hw_dispatcher::initialize_hw (11,911,549 samples, 0.09%) + -[[kernel.kallsyms]] (3,390,306 samples, 0.02%) - +[[kernel.kallsyms]] (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (5,574,118 samples, 0.03%) - +[[kernel.kallsyms]] (17,207,204 samples, 0.13%) + -[[kernel.kallsyms]] (2,526,230 samples, 0.01%) - +dlopen_doit (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (26,632,596 samples, 0.14%) - +[[kernel.kallsyms]] (2,587,208 samples, 0.02%) + -[[kernel.kallsyms]] (44,499,432 samples, 0.23%) - +void std::__detail::__waiter<std::integral_constant<bool, true> >::_M_do_wait<std::__tree_barrier<NopStruct>::wait (2,605,852 samples, 0.02%) + -[[kernel.kallsyms]] (1,554,115,900 samples, 8.13%) -[[kernel.ka.. +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (12,962,126 samples, 0.10%) + -[[kernel.kallsyms]] (3,386,595 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + -[sudo] (11,228,095 samples, 0.06%) - +[[kernel.kallsyms]] (1,832,823,392 samples, 14.33%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (3,386,595 samples, 0.02%) - +__GI_sched_yield (1,737,314 samples, 0.01%) + -sudo_debug_fork_v1 (2,941,457 samples, 0.02%) - +LT<unsigned long>::simd_filter (3,470,584 samples, 0.03%) + -[[kernel.kallsyms]] (4,339,346 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,656 samples, 0.01%) + -sh (10,190,112 samples, 0.05%) - +accfg_get_param_long (2,238,026 samples, 0.02%) + -[[kernel.kallsyms]] (3,294,563 samples, 0.02%) - +void std::__detail::__platform_notify<int> (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (2,440,610 samples, 0.01%) - +__sysfs_read_attr (1,751,881 samples, 0.01%) + -[[kernel.kallsyms]] (4,326,736 samples, 0.02%) - +[[kernel.kallsyms]] (2,589,863 samples, 0.02%) + -[[kernel.kallsyms]] (4,009,567 samples, 0.02%) - +std::thread::thread<void (2,587,208 samples, 0.02%) + -[[kernel.kallsyms]] (22,340,223 samples, 0.12%) - +scan_a (3,288,832,947 samples, 25.72%) +scan_a -[[kernel.kallsyms]] (2,526,230 samples, 0.01%) - +__GI__dl_catch_exception (1,262,425 samples, 0.01%) + -[dash] (7,306,765 samples, 0.04%) - +dml::core::hardware_device::submit (11,911,549 samples, 0.09%) + -[[kernel.kallsyms]] (4,702,672 samples, 0.02%) - +clone3 (3,654,081,491 samples, 28.57%) +clone3 -sudo_debug_fork_v1 (2,891,036 samples, 0.02%) - +[[kernel.kallsyms]] (1,562,656 samples, 0.01%) + -[[kernel.kallsyms]] (1,705,251 samples, 0.01%) - +openat (1,143,574 samples, 0.01%) + -syscall (1,737,042 samples, 0.01%) - +dml::detail::ml::task<std::allocator<unsigned char> >::task (28,378,674 samples, 0.22%) + -start_thread (2,954,658,675 samples, 15.45%) -start_thread +dml::core::dispatcher::hw_dispatcher::hw_dispatcher (11,911,549 samples, 0.09%) + -[[kernel.kallsyms]] (2,526,230 samples, 0.01%) - +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + -std::chrono::_V2::steady_clock::now (33,709,335 samples, 0.18%) - +Sum<unsigned long>::simd_agg (18,513,417 samples, 0.14%) + -[[kernel.kallsyms]] (18,006,589 samples, 0.09%) - +std::barrier<NopStruct>::arrive_and_wait (6,080,084 samples, 0.05%) + -[[kernel.kallsyms]] (22,340,223 samples, 0.12%) - +[[kernel.kallsyms]] (1,830,230,761 samples, 14.31%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (4,326,736 samples, 0.02%) - +[[kernel.kallsyms]] (17,941,270 samples, 0.14%) + -[libpam.so.0.85.1] (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (1,143,574 samples, 0.01%) + -[[kernel.kallsyms]] (3,264,047 samples, 0.02%) - +clock_gettime@plt (13,672,702 samples, 0.11%) + -_dl_start (1,881,846 samples, 0.01%) - +std::barrier<NopStruct>::arrive_and_wait (2,605,852 samples, 0.02%) + -[[kernel.kallsyms]] (5,172,283 samples, 0.03%) - +sum_check (352,075,561 samples, 2.75%) +su.. -__libc_open64 (4,009,567 samples, 0.02%) - +[[kernel.kallsyms]] (25,788,811 samples, 0.20%) + -[[kernel.kallsyms]] (2,571,689 samples, 0.01%) - +std::barrier<NopStruct>::arrive (2,605,490 samples, 0.02%) + -[[kernel.kallsyms]] (4,702,672 samples, 0.02%) - +[[kernel.kallsyms]] (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (6,080,995 samples, 0.03%) - +[[kernel.kallsyms]] (17,941,270 samples, 0.14%) + -[[kernel.kallsyms]] (3,390,306 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,656 samples, 0.01%) + -[[kernel.kallsyms]] (1,737,042 samples, 0.01%) - +[[kernel.kallsyms]] (5,959,122 samples, 0.05%) + -[[kernel.kallsyms]] (1,766,715 samples, 0.01%) - +[[kernel.kallsyms]] (2,587,208 samples, 0.02%) + -[[kernel.kallsyms]] (5,574,118 samples, 0.03%) - +dlopen_implementation (1,262,425 samples, 0.01%) + -all (19,120,262,162 samples, 100%) - +[[kernel.kallsyms]] (1,086,257 samples, 0.01%) + -[QDPBench] (1,528,702,105 samples, 8.00%) -[QDPBench] +_mid_memalign (28,378,674 samples, 0.22%) + -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +device_parse (8,897,243 samples, 0.07%) + -[QDPBench] (2,915,385,788 samples, 15.25%) -[QDPBench] +[[kernel.kallsyms]] (1,737,656 samples, 0.01%) + -[[kernel.kallsyms]] (3,145,217 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (4,009,567 samples, 0.02%) - +void fill_mt<unsigned long> (4,401,975,708 samples, 34.42%) +void fill_mt<unsigned long> -[[kernel.kallsyms]] (2,599,464 samples, 0.01%) - +dl_open_worker (1,262,425 samples, 0.01%) + -[[vdso]] (3,385,383,119 samples, 17.71%) -[[vdso]] +[[kernel.kallsyms]] (10,426,718 samples, 0.08%) + -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (28,378,674 samples, 0.22%) + -[[kernel.kallsyms]] (2,571,689 samples, 0.01%) - +[[kernel.kallsyms]] (10,426,718 samples, 0.08%) + -__GI_munmap (3,386,595 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + -[[kernel.kallsyms]] (3,264,047 samples, 0.02%) - +[anon] (154,796,010 samples, 1.21%) + -[sudo] (7,482,832 samples, 0.04%) - +__GI_mprotect (1,106,913 samples, 0.01%) + -[[kernel.kallsyms]] (1,555,844,455 samples, 8.14%) -[[kernel.ka.. +void std::__atomic_impl::notify_all<std::__barrier_phase_t> (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (3,315,227 samples, 0.02%) - +main (4,776,770,397 samples, 37.35%) +main -[[kernel.kallsyms]] (2,955,258 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + -[sudoers.so] (2,495,463 samples, 0.01%) - +_dl_check_all_versions (1,132,673 samples, 0.01%) + -[[kernel.kallsyms]] (2,596,755 samples, 0.01%) - +[[kernel.kallsyms]] (1,810,367,544 samples, 14.16%) +[[kernel.kallsyms]] -[sudo] (4,243,346 samples, 0.02%) - +__GI_exit (1,662,976 samples, 0.01%) + -[[kernel.kallsyms]] (10,367,760 samples, 0.05%) - +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + -[[kernel.kallsyms]] (2,273,418 samples, 0.01%) - +void std::__atomic_notify_address<std::__barrier_phase_t> (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (5,722,742 samples, 0.03%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -[[kernel.kallsyms]] (1,731,499 samples, 0.01%) - +__sysfs_device_parse (2,238,026 samples, 0.02%) + -[[kernel.kallsyms]] (19,743,301 samples, 0.10%) - +[[kernel.kallsyms]] (1,562,656 samples, 0.01%) + -[[kernel.kallsyms]] (2,558,148 samples, 0.01%) - +[[kernel.kallsyms]] (5,959,122 samples, 0.05%) + -__GI_mprotect (44,499,432 samples, 0.23%) - +[[kernel.kallsyms]] (13,817,985 samples, 0.11%) + -[[kernel.kallsyms]] (4,009,567 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (1,737,042 samples, 0.01%) - +[[kernel.kallsyms]] (25,788,811 samples, 0.20%) + -[dash] (4,979,549 samples, 0.03%) - +_int_malloc (28,378,674 samples, 0.22%) + -[[kernel.kallsyms]] (16,418,309 samples, 0.09%) - +std::chrono::_V2::steady_clock::now (97,534,503 samples, 0.76%) + -[[kernel.kallsyms]] (4,702,672 samples, 0.02%) - +std::__tree_barrier<NopStruct>::wait (2,605,852 samples, 0.02%) + -__GI__dl_catch_exception (1,669,717 samples, 0.01%) - +_dl_check_map_versions (1,132,673 samples, 0.01%) + -dl_open_worker (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (1,662,976 samples, 0.01%) + -[[kernel.kallsyms]] (2,596,755 samples, 0.01%) - +std::__detail::__waiter_pool_base::_M_notify (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (2,596,755 samples, 0.01%) - +dml::core::dispatcher::hw_dispatcher::~hw_dispatcher (1,662,976 samples, 0.01%) + -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +[[kernel.kallsyms]] (6,243,993 samples, 0.05%) + -[[kernel.kallsyms]] (3,294,563 samples, 0.02%) - +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + -[QDPBench] (7,414,426,768 samples, 38.78%) -[QDPBench] +[[kernel.kallsyms]] (11,267,710 samples, 0.09%) + -[[kernel.kallsyms]] (1,905,789,850 samples, 9.97%) -[[kernel.kalls.. +__libc_openat64 (1,143,574 samples, 0.01%) + -clone3 (4,326,736 samples, 0.02%) - +dsacache::Cache::SubmitTask (45,447,457 samples, 0.36%) + -[[kernel.kallsyms]] (4,702,672 samples, 0.02%) - +[[kernel.kallsyms]] (1,143,574 samples, 0.01%) + -[[kernel.kallsyms]] (4,339,346 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (4,326,736 samples, 0.02%) - +__libc_openat64 (6,243,993 samples, 0.05%) + -[[kernel.kallsyms]] (6,948,349 samples, 0.04%) - +__GI_madvise (1,737,656 samples, 0.01%) + -__GI___mmap64 (7,679,166 samples, 0.04%) - +dsacache::Cache::ExecuteCopy (40,290,223 samples, 0.32%) + -[[kernel.kallsyms]] (6,948,349 samples, 0.04%) - +version_check_doit (1,132,673 samples, 0.01%) + -[dash] (6,200,871 samples, 0.03%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (306,053,484 samples, 2.39%) +u.. -[[kernel.kallsyms]] (7,679,166 samples, 0.04%) - +dml::core::dispatcher::hw_device::initialize_new_device (10,649,124 samples, 0.08%) + -__GI_madvise (3,394,767 samples, 0.02%) - +[[kernel.kallsyms]] (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (1,904,922,585 samples, 9.96%) -[[kernel.kalls.. +[[kernel.kallsyms]] (2,589,863 samples, 0.02%) + -[[kernel.kallsyms]] (2,499,962 samples, 0.01%) - +__GI__dl_catch_exception (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (1,766,715 samples, 0.01%) - +[[kernel.kallsyms]] (6,243,993 samples, 0.05%) + -[[kernel.kallsyms]] (1,766,715 samples, 0.01%) - +[[kernel.kallsyms]] (1,143,574 samples, 0.01%) + -[[kernel.kallsyms]] (6,948,349 samples, 0.04%) - +_mm512_stream_load_si512 (279,005,046 samples, 2.18%) +_.. -[sudo] (12,997,262 samples, 0.07%) - +[[kernel.kallsyms]] (1,737,656 samples, 0.01%) + -[[kernel.kallsyms]] (2,955,258 samples, 0.02%) - +dsacache::CacheData::WaitOnCompletion (11,295,967 samples, 0.09%) + -[[kernel.kallsyms]] (2,955,258 samples, 0.02%) - +[[kernel.kallsyms]] (6,243,993 samples, 0.05%) + -[[kernel.kallsyms]] (3,386,595 samples, 0.02%) - +[[kernel.kallsyms]] (2,585,739 samples, 0.02%) + -[libpam.so.0.85.1] (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + -[[kernel.kallsyms]] (1,255,080,269 samples, 6.56%) -[[kernel.. +_dlerror_run (1,262,425 samples, 0.01%) + -std::chrono::_V2::steady_clock::now (46,877,413 samples, 0.25%) - +__GI_munmap (1,662,976 samples, 0.01%) + -[libstdc++.so.6.0.32] (62,206,438 samples, 0.33%) - +[[kernel.kallsyms]] (1,724,282 samples, 0.01%) + -[dash] (7,306,765 samples, 0.04%) - +[[vdso]] (3,393,286,810 samples, 26.53%) +[[vdso]] -[[kernel.kallsyms]] (43,697,155 samples, 0.23%) - +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + -[sudoers.so] (2,495,463 samples, 0.01%) - +__pthread_create_2_1 (2,587,208 samples, 0.02%) + -[dash] (6,200,871 samples, 0.03%) - +[[kernel.kallsyms]] (1,562,656 samples, 0.01%) + -[[kernel.kallsyms]] (2,526,230 samples, 0.01%) - +std::__detail::__thread_yield (1,737,314 samples, 0.01%) + -clock_gettime@plt (23,445,898 samples, 0.12%) - +_dl_receive_error (1,132,673 samples, 0.01%) + + + +[[kernel.kallsyms]] (1,725,071 samples, 0.01%) + + + +_dl_protect_relro (1,106,913 samples, 0.01%) + + + +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + + + +unsigned long std::uniform_int_distribution<unsigned long>::operator (319,807,366 samples, 2.50%) +un.. + + +[[kernel.kallsyms]] (18,810,377 samples, 0.15%) + + + +__GI_munmap (18,068,150 samples, 0.14%) + + + +[[kernel.kallsyms]] (1,094,452 samples, 0.01%) + + + +[[kernel.kallsyms]] (1,143,574 samples, 0.01%) + + + +dml::core::dispatcher::hw_queue::initialize_new_queue (1,751,881 samples, 0.01%) + + + +[[kernel.kallsyms]] (1,737,164 samples, 0.01%) + + + +__mmap64 (1,262,425 samples, 0.01%) + + + +accfg_get_param_long (8,019,769 samples, 0.06%) + + + +grow_heap (2,585,739 samples, 0.02%) + + + +[[kernel.kallsyms]] (2,587,208 samples, 0.02%) + + + +[[kernel.kallsyms]] (3,452,194 samples, 0.03%) + + + +__run_exit_handlers (1,662,976 samples, 0.01%) + + + +__sysfs_device_parse (8,897,243 samples, 0.07%) + + + +__GI__IO_file_doallocate (2,585,739 samples, 0.02%) + + + +[[kernel.kallsyms]] (3,452,194 samples, 0.03%) + + + +[libstdc++.so.6.0.32] (3,648,891,641 samples, 28.53%) +[libstdc++.so.6.0.32] + + +[[kernel.kallsyms]] (2,587,208 samples, 0.02%) + + + +grow_heap (25,788,811 samples, 0.20%) + + + +bool std::__detail::__waiter_base<std::__detail::__waiter_pool>::_M_do_spin<std::__tree_barrier<NopStruct>::wait (1,737,314 samples, 0.01%) + + + +std::chrono::_V2::steady_clock::now (48,250,590 samples, 0.38%) + + + +[[kernel.kallsyms]] (25,788,811 samples, 0.20%) + + + +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + + + +[[kernel.kallsyms]] (2,585,739 samples, 0.02%) + + + +[[kernel.kallsyms]] (1,833,688,222 samples, 14.34%) +[[kernel.kallsyms]] + + +[[kernel.kallsyms]] (1,737,656 samples, 0.01%) + + + +[[kernel.kallsyms]] (5,210,636 samples, 0.04%) + + + +openat (6,243,993 samples, 0.05%) + + + +std::__tree_barrier<NopStruct>::arrive (2,605,490 samples, 0.02%) + + + +[[kernel.kallsyms]] (2,496,020 samples, 0.02%) + + + +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (248,928,848 samples, 1.95%) +u.. + + +std::__new_allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (28,378,674 samples, 0.22%) + + + +void std::__atomic_wait_address<std::__barrier_phase_t, std::__tree_barrier<NopStruct>::wait (2,605,852 samples, 0.02%) + + + +[[kernel.kallsyms]] (3,360,039 samples, 0.03%) + + + +[[kernel.kallsyms]] (3,452,194 samples, 0.03%) + + + +[[kernel.kallsyms]] (6,243,993 samples, 0.05%) + + + +[[kernel.kallsyms]] (12,950,787 samples, 0.10%) + + + +[[kernel.kallsyms]] (2,589,863 samples, 0.02%) + + + +[[kernel.kallsyms]] (2,496,020 samples, 0.02%) + + + +accfg_wq_get_state (1,751,881 samples, 0.01%) + + + +[[kernel.kallsyms]] (2,587,208 samples, 0.02%) + + + +[[kernel.kallsyms]] (3,452,194 samples, 0.03%) + + + +[[kernel.kallsyms]] (3,452,194 samples, 0.03%) + + + +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (1,000,135,128 samples, 7.82%) +std::mersen.. + + +[[kernel.kallsyms]] (5,210,636 samples, 0.04%) + + + +[[kernel.kallsyms]] (1,662,976 samples, 0.01%) + + + +[[kernel.kallsyms]] (3,452,194 samples, 0.03%) + + + +[[kernel.kallsyms]] (18,068,150 samples, 0.14%) + + + +allocate_stack (2,587,208 samples, 0.02%) + + + +dsacache::Cache::Access (45,447,457 samples, 0.36%) + + + +_dl_start_final (2,239,586 samples, 0.02%) + + + +numa_node_size64 (5,157,234 samples, 0.04%) + + + +[[kernel.kallsyms]] (2,587,208 samples, 0.02%) + + + +[[kernel.kallsyms]] (6,243,993 samples, 0.05%) + + + +bool std::__detail::__atomic_spin<std::__tree_barrier<NopStruct>::wait (1,737,314 samples, 0.01%) + + + +[[kernel.kallsyms]] (8,686,189 samples, 0.07%) + + + +__mmap64 (1,262,425 samples, 0.01%) + + + +[[kernel.kallsyms]] (10,426,718 samples, 0.08%) + + + +[[kernel.kallsyms]] (1,562,656 samples, 0.01%) + + + +std::allocator_traits<std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> > >::allocate (28,378,674 samples, 0.22%) + + + +add_group (3,101,168 samples, 0.02%) + + + +[[kernel.kallsyms]] (1,737,164 samples, 0.01%) + + + +__GI__dl_catch_exception (1,262,425 samples, 0.01%) + + + +__GI___clock_gettime (70,219,218 samples, 0.55%) + -__GI_munmap (6,948,349 samples, 0.04%) - +std::barrier<NopStruct>::wait (3,474,594 samples, 0.03%) + -[sudo] (10,424,289 samples, 0.05%) - +[[kernel.kallsyms]] (1,737,656 samples, 0.01%) + -[[stack]] (1,529,567,888 samples, 8.00%) -[[stack]] +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + -__libc_start_call_main (7,306,765 samples, 0.04%) - +strncmp (1,132,673 samples, 0.01%) + -[libpam.so.0.85.1] (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (1,106,913 samples, 0.01%) + -[[kernel.kallsyms]] (22,340,223 samples, 0.12%) - +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + -[[kernel.kallsyms]] (7,679,166 samples, 0.04%) - +add_device (2,238,026 samples, 0.02%) + -[[kernel.kallsyms]] (5,172,283 samples, 0.03%) - +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + -[[kernel.kallsyms]] (1,732,493 samples, 0.01%) - +all (12,788,431,143 samples, 100%) + -_dl_catch_error (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (2,587,208 samples, 0.02%) + -[[kernel.kallsyms]] (1,904,922,585 samples, 9.96%) -[[kernel.kalls.. +_start (4,781,706,256 samples, 37.39%) +_start -[[kernel.kallsyms]] (1,890,194,856 samples, 9.89%) -[[kernel.kalls.. +_mm512_mask_add_epi64 (18,513,417 samples, 0.14%) + -[[kernel.kallsyms]] (2,955,258 samples, 0.02%) - +_mm512_stream_load_si512 (201,075,858 samples, 1.57%) + -[[kernel.kallsyms]] (4,009,567 samples, 0.02%) - +_dl_sysdep_start (2,239,586 samples, 0.02%) + -arch_fork (2,457,353 samples, 0.01%) - +std::thread& std::vector<std::thread, std::allocator<std::thread> >::emplace_back<void (2,587,208 samples, 0.02%) + -[[kernel.kallsyms]] (1,766,715 samples, 0.01%) - +[[kernel.kallsyms]] (3,452,194 samples, 0.03%) + -[[kernel.kallsyms]] (6,948,349 samples, 0.04%) - +wqs_init (8,897,243 samples, 0.07%) + -std::chrono::_V2::steady_clock::now (71,080,193 samples, 0.37%) - +aggr_j (313,529,084 samples, 2.45%) +ag.. -[[kernel.kallsyms]] (3,394,767 samples, 0.02%) - +[[kernel.kallsyms]] (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (5,172,283 samples, 0.03%) - +[[kernel.kallsyms]] (1,833,688,222 samples, 14.34%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (2,440,610 samples, 0.01%) - +_dl_start (2,239,586 samples, 0.02%) + -[[kernel.kallsyms]] (1,635,518 samples, 0.01%) - +std::__detail::__waiter_pool::_M_do_wait (1,737,280 samples, 0.01%) + -dl_main (1,881,846 samples, 0.01%) - +std::common_type<std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >::type std::chrono::operator-<std::chrono::_V2::steady_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (3,498,307 samples, 0.03%) + -[[kernel.kallsyms]] (2,457,353 samples, 0.01%) - +_dl_relocate_object (1,106,913 samples, 0.01%) + -[[kernel.kallsyms]] (4,705,531 samples, 0.02%) - +[[kernel.kallsyms]] (1,735,727 samples, 0.01%) + -__GI__dl_catch_exception (1,669,717 samples, 0.01%) - +device_parse (2,238,026 samples, 0.02%) + -[[kernel.kallsyms]] (3,315,227 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,164 samples, 0.01%) + -[[kernel.kallsyms]] (6,948,349 samples, 0.04%) - +[[kernel.kallsyms]] (1,262,425 samples, 0.01%) + -[dash] (7,306,765 samples, 0.04%) - +accfg_get_param_str (2,040,247 samples, 0.02%) + -__GI___clock_gettime (65,660,312 samples, 0.34%) - +[[kernel.kallsyms]] (1,510,538,762 samples, 11.81%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +bool std::__detail::__waiter_base<std::__detail::__waiter_pool>::_S_do_spin<std::__tree_barrier<NopStruct>::wait (1,737,314 samples, 0.01%) + -[[kernel.kallsyms]] (4,705,531 samples, 0.02%) - +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + -[[kernel.kallsyms]] (2,464,297 samples, 0.01%) - +__GI___libc_read (1,775,776 samples, 0.01%) + -[[kernel.kallsyms]] (1,766,715 samples, 0.01%) - +[[kernel.kallsyms]] (2,589,863 samples, 0.02%) + -[[kernel.kallsyms]] (2,526,230 samples, 0.01%) - +_dl_open (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (3,145,217 samples, 0.02%) - +__GI__IO_doallocbuf (2,585,739 samples, 0.02%) + -__GI___clock_gettime (62,206,438 samples, 0.33%) - +[[kernel.kallsyms]] (5,959,122 samples, 0.05%) + -[[kernel.kallsyms]] (5,172,283 samples, 0.03%) - +start_thread (3,650,629,297 samples, 28.55%) +start_thread -[[kernel.kallsyms]] (2,522,592 samples, 0.01%) - +[[kernel.kallsyms]] (3,452,194 samples, 0.03%) + -[libpam.so.0.85.1] (1,669,717 samples, 0.01%) - +auto dml::detail::ml::make_mem_move_task<std::allocator<unsigned char> > (28,378,674 samples, 0.22%) + -[[kernel.kallsyms]] (2,571,689 samples, 0.01%) - +[[kernel.kallsyms]] (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +[[kernel.kallsyms]] (2,589,863 samples, 0.02%) + -__GI___mmap64 (7,679,166 samples, 0.04%) - +void std::allocator_traits<std::allocator<std::thread> >::construct<std::thread, void (2,587,208 samples, 0.02%) + -[[kernel.kallsyms]] (3,390,306 samples, 0.02%) - +__GI___close (1,094,452 samples, 0.01%) + -__GI___getdents64 (2,955,258 samples, 0.02%) - +void std::__detail::__platform_wait<int> (1,735,727 samples, 0.01%) + -__GI__Fork (3,294,563 samples, 0.02%) - +void std::vector<std::thread, std::allocator<std::thread> >::_M_realloc_insert<void (2,587,208 samples, 0.02%) + -[[kernel.kallsyms]] (3,390,306 samples, 0.02%) - +[[kernel.kallsyms]] (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (2,522,592 samples, 0.01%) - +accfg_wq_get_first (8,897,243 samples, 0.07%) + -[[kernel.kallsyms]] (7,679,166 samples, 0.04%) - +dml::detail::ml::buffer<std::allocator<unsigned char>, dml::detail::descriptor, dml::detail::completion_record>::buffer (28,378,674 samples, 0.22%) + -__GI__dl_catch_exception (1,669,717 samples, 0.01%) - +[[kernel.kallsyms]] (10,426,718 samples, 0.08%) + -dl_open_worker_begin (1,669,717 samples, 0.01%) - +__GI___libc_malloc (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (3,264,047 samples, 0.02%) - +_dl_map_object (1,262,425 samples, 0.01%) + -__libc_fork (2,941,457 samples, 0.02%) - +[[kernel.kallsyms]] (3,621,354 samples, 0.03%) + -[[kernel.kallsyms]] (5,172,283 samples, 0.03%) - +void std::__atomic_wait_address<std::__barrier_phase_t, std::__tree_barrier<NopStruct>::wait (1,737,314 samples, 0.01%) + -[sudoers.so] (2,495,463 samples, 0.01%) - +[[kernel.kallsyms]] (1,737,656 samples, 0.01%) + -[[kernel.kallsyms]] (1,904,055,605 samples, 9.96%) -[[kernel.kalls.. +Aggregation<unsigned long, Sum, (296,406,305 samples, 2.32%) +A.. -[[kernel.kallsyms]] (2,455,182 samples, 0.01%) - +std::__tree_barrier<NopStruct>::wait (3,474,594 samples, 0.03%) + -__libc_start_call_main (2,934,395,903 samples, 15.35%) -__libc_start_call_main +dsa_initialize_accelerator_driver (1,262,425 samples, 0.01%) + -[[kernel.kallsyms]] (19,010,115 samples, 0.10%) - +[[kernel.kallsyms]] (24,927,309 samples, 0.19%) + + + +[[kernel.kallsyms]] (1,737,280 samples, 0.01%) + + + +decltype (2,587,208 samples, 0.02%) + + + +[[kernel.kallsyms]] (1,562,656 samples, 0.01%) + + + +[[kernel.kallsyms]] (2,585,739 samples, 0.02%) + -[[kernel.kallsyms]] (1,731,499 samples, 0.01%) - +___dlopen (1,262,425 samples, 0.01%) + -__libc_start_call_main (12,997,262 samples, 0.07%) - +[[kernel.kallsyms]] (2,589,863 samples, 0.02%) + diff --git a/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv b/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv index b33ee17..6f9aff0 100644 --- a/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv +++ b/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv @@ -1,6 +1,6 @@ run;rt-ns;rt-s;result[0];scana-run;scana-wait;scana-load;scanb-run;scanb-wait;aggrj-run;aggrj-wait;aggrj-load;cache-hr; -0;1169517226;1.16952;13289362112;6254;767;1663696;0;0;2666575;622519;1433191;0; -1;1210548349;1.21055;13289362112;6125;1155;1379106;0;0;2951294;520752;1210710;0; -2;1130983794;1.13098;13289362112;6839;384;1313762;0;0;2741965;542631;1323214;0; -3;1434553481;1.43455;13289362112;6108;805;1617857;0;0;3607345;488764;1582563;0; -4;1096362826;1.09636;13289362112;6002;1106;1307258;0;0;2757539;484020;1186513;0; +0;1384070228;1.38407;13289362112;18446744073708121004;910;1437012;0;0;0;441613;1572806;0; +1;1253705586;1.25371;13289362112;18446744073708444589;751;1113377;0;0;0;323023;1297474;0; +2;1424691737;1.42469;13289362112;18446744073708138019;1328;1419632;0;0;0;400037;1557709;0; +3;1329719999;1.32972;13289362112;18446744073708274218;312;1284204;0;0;0;392582;1317678;0; +4;1250425103;1.25043;13289362112;18446744073708047114;201;1511398;0;0;0;416545;1451243;0; diff --git a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-aggrj-tca2-tcb0-tcj1-tmul8-wl4294967296-cs16777216.csv b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-aggrj-tca2-tcb0-tcj1-tmul8-wl4294967296-cs16777216.csv deleted file mode 100644 index 9165927..0000000 --- a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-aggrj-tca2-tcb0-tcj1-tmul8-wl4294967296-cs16777216.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;rt-ns;rt-s;result[0];scana-run;scana-wait;scana-load;scanb-run;scanb-wait;aggrj-run;aggrj-wait;aggrj-load;cache-hr; -0;1932671803;1.93267;13289362112;752963;1069516;15695554;0;0;53143723;4264839;13750140;0; -1;1896349857;1.89635;13289362112;735056;455584;30878957;0;0;50200678;5438457;21793742;0; -2;2538999696;2.539;13289362112;567524;1321982;16417950;0;0;71928307;3731924;21716361;0; -3;2510985578;2.51099;13289362112;663346;1183415;16645686;0;0;71099401;4008425;15409823;0; -4;2377506902;2.37751;13289362112;1077932;519479;21188438;0;0;58766096;5650539;22650066;0; diff --git a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv index a53e15c..b71893b 100644 --- a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv +++ b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv @@ -1,6 +1,6 @@ run;rt-ns;rt-s;result[0];scana-run;scana-wait;scana-load;scanb-run;scanb-wait;aggrj-run;aggrj-wait;aggrj-load;cache-hr; -0;2203037279;2.20304;13289362112;99157;122246;14142833;16235812;680;59936024;3879840;20147673;0; -1;2160465410;2.16047;13289362112;89143;145557;19131419;16059594;761;58201639;5327352;22929296;0; -2;2556391208;2.55639;13289362112;97301;180667;17570772;15077808;703;74692903;3949064;22146299;0; -3;2695041594;2.69504;13289362112;109739;141424;20546072;16557841;934;78638782;2299348;28857837;0; -4;2712434994;2.71243;13289362112;110164;200009;19790300;16815906;915;81159365;2672159;28090727;0; +0;2536072344;2.53607;13289362112;18446744073697239175;995173;13157651;2188748;112;0;4544721;18447110;0; +1;2165383914;2.16538;13289362112;18446744073686547899;1562266;23639880;2040783;129;0;4083149;28451690;0; +2;2393350274;2.39335;13289362112;18446744073691218761;914891;19038237;2050101;95;0;3989943;23093564;0; +3;2542202609;2.5422;13289362112;18446744073697519601;1370138;12615641;3695908;83;0;4736486;16720786;0; +4;2616699345;2.6167;13289362112;18446744073695083480;1461789;15194072;2172819;141;0;3963663;18705128;0; diff --git a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv index 3fd5bd5..a180d11 100644 --- a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv +++ b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv @@ -1,6 +1,6 @@ run;rt-ns;rt-s;result[0];scana-run;scana-wait;scana-load;scanb-run;scanb-wait;aggrj-run;aggrj-wait;aggrj-load;cache-hr; -0;2551415911;2.55142;13289362112;107363;159226;11816287;14918998;1259;72953905;4125726;15098179;0; -1;2256490884;2.25649;13289362112;94184;91028;27588460;15270661;575;61903000;4103993;23673584;0; -2;2041975719;2.04198;13289362112;149022;15690;19198269;15354530;896;56442770;5437217;18248838;0; -3;2060686329;2.06069;13289362112;156548;12177;22131743;16985942;872;56773867;5488597;17609600;0; -4;2426252756;2.42625;13289362112;88670;169306;21807821;31883140;780;69732636;3012650;21239065;0; +0;2470863547;2.47086;13289362112;18446744073690031357;1196819;20069202;1963658;107;0;3377995;22622836;0; +1;2528668383;2.52867;13289362112;18446744073690542201;1270587;19964463;1958977;95;0;3524605;22578890;0; +2;1995968797;1.99597;13289362112;18446744073689806113;582965;20816720;1956052;93;0;4951653;23045303;0; +3;2015452757;2.01545;13289362112;18446744073684432123;1643622;25889421;1947255;100;0;6225001;19511459;0; +4;2280860370;2.28086;13289362112;18446744073691893576;1347346;18264335;4137877;117;0;4813737;21454208;0;