From 6dd7f80500f69a79fd45ce13de58a9783391a1b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Thu, 25 Jan 2024 17:31:08 +0100 Subject: [PATCH] again, redo the perf flame graph --- .../qdp-xeonmax-simple-prefetch-perf.svg | 2116 ++++++++++++----- 1 file changed, 1468 insertions(+), 648 deletions(-) diff --git a/qdp_project/evaluation-results/qdp-xeonmax-simple-prefetch-perf.svg b/qdp_project/evaluation-results/qdp-xeonmax-simple-prefetch-perf.svg index e736d31..1d0dc86 100644 --- a/qdp_project/evaluation-results/qdp-xeonmax-simple-prefetch-perf.svg +++ b/qdp_project/evaluation-results/qdp-xeonmax-simple-prefetch-perf.svg @@ -1,6 +1,6 @@ - + @@ -421,1301 +421,2121 @@ } ]]> - + Flame Graph - + Reset Zoom Search ic - + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +[[kernel.kallsyms]] (13,838,204 samples, 0.02%) + -dml_wait_busy_poll (1,966,417,818 samples, 2.08%) -d.. +[[kernel.kallsyms]] (52,582,122 samples, 0.09%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +[[kernel.kallsyms]] (33,581,259 samples, 0.06%) + -[[kernel.kallsyms]] (891,320,615 samples, 0.94%) - +[[kernel.kallsyms]] (8,650,216 samples, 0.02%) + -[[kernel.kallsyms]] (114,816,414 samples, 0.12%) - +[[kernel.kallsyms]] (15,074,171 samples, 0.03%) + -[[kernel.kallsyms]] (8,063,816 samples, 0.01%) - +[[kernel.kallsyms]] (8,457,723 samples, 0.02%) + -[[kernel.kallsyms]] (30,034,265 samples, 0.03%) - +__GI_mprotect (9,972,283 samples, 0.02%) + -void std::destroy_at<std::pair<unsigned char* const, dsacache::CacheData> > (1,966,417,818 samples, 2.08%) -v.. +[[kernel.kallsyms]] (44,111,847 samples, 0.08%) + -[[kernel.kallsyms]] (10,746,861 samples, 0.01%) - +[[kernel.kallsyms]] (5,997,398 samples, 0.01%) + -dsacache::Cache::Clear (1,966,417,818 samples, 2.08%) -d.. +[[kernel.kallsyms]] (8,647,063 samples, 0.02%) + -[[kernel.kallsyms]] (1,680,680,697 samples, 1.77%) - +syscall (19,162,146 samples, 0.03%) + -[[kernel.kallsyms]] (13,855,268 samples, 0.01%) - +[[kernel.kallsyms]] (64,992,482 samples, 0.12%) + -[[kernel.kallsyms]] (111,334,229 samples, 0.12%) - +__libc_start_main_impl (47,034,233,602 samples, 83.69%) +__libc_start_main_impl -[[kernel.kallsyms]] (113,945,945 samples, 0.12%) - +[[kernel.kallsyms]] (6,540,584 samples, 0.01%) + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +[[kernel.kallsyms]] (7,324,068,091 samples, 13.03%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (15,956,913 samples, 0.02%) - +[[kernel.kallsyms]] (14,696,092 samples, 0.03%) + -unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (7,791,369,565 samples, 8.23%) -unsigned in.. +dsacache::CacheData::~CacheData (27,978,244,770 samples, 49.78%) +dsacache::CacheData::~CacheData -[[kernel.kallsyms]] (7,364,969,554 samples, 7.78%) -[[kernel.k.. +unsigned long std::uniform_int_distribution<unsigned long>::operator (1,224,461,441 samples, 2.18%) +u.. -[[kernel.kallsyms]] (9,242,568 samples, 0.01%) - +[[kernel.kallsyms]] (19,939,586 samples, 0.04%) + -[[kernel.kallsyms]] (6,145,999,045 samples, 6.49%) -[[kernel.. +[[kernel.kallsyms]] (78,676,124 samples, 0.14%) + -[[kernel.kallsyms]] (9,447,981 samples, 0.01%) - +auto dml::detail::ml::make_mem_move_task<std::allocator<unsigned char> > (52,769,643 samples, 0.09%) + -Filter<unsigned long, LT, (8,703,974,058 samples, 9.19%) -Filter<unsign.. +[[kernel.kallsyms]] (58,263,661 samples, 0.10%) + -[[kernel.kallsyms]] (344,116,366 samples, 0.36%) - +[[kernel.kallsyms]] (9,614,253 samples, 0.02%) + -[[kernel.kallsyms]] (32,926,402 samples, 0.03%) - +dml_wait_busy_poll (27,883,144,753 samples, 49.61%) +dml_wait_busy_poll -void fill_mt<unsigned long> (20,391,957,000 samples, 21.54%) -void fill_mt<unsigned long> +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + -[[kernel.kallsyms]] (53,213,161 samples, 0.06%) - +std::__new_allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (10,813,731 samples, 0.02%) + -[[kernel.kallsyms]] (104,828,295 samples, 0.11%) - +[[kernel.kallsyms]] (5,469,680 samples, 0.01%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +[[kernel.kallsyms]] (11,778,485 samples, 0.02%) + -dsacache::CacheData::WaitOnCompletion (1,966,417,818 samples, 2.08%) -d.. +[[kernel.kallsyms]] (92,507,760 samples, 0.16%) + -[[kernel.kallsyms]] (32,615,269 samples, 0.03%) - +[[kernel.kallsyms]] (8,647,063 samples, 0.02%) + -[[kernel.kallsyms]] (115,508,253 samples, 0.12%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (8,778,281,021 samples, 15.62%) +unsigned long std::unifo.. -[[kernel.kallsyms]] (30,383,199 samples, 0.03%) - +device_parse (16,212,211 samples, 0.03%) + -[[kernel.kallsyms]] (137,504,974 samples, 0.15%) - +__GI_mprotect (50,154,117 samples, 0.09%) + -[[kernel.kallsyms]] (102,237,912 samples, 0.11%) - +[[kernel.kallsyms]] (6,005,283,229 samples, 10.69%) +[[kernel.kallsy.. -_mm512_stream_load_si512 (254,936,384 samples, 0.27%) - +[[kernel.kallsyms]] (6,835,870 samples, 0.01%) + -[[kernel.kallsyms]] (111,334,229 samples, 0.12%) - +[[kernel.kallsyms]] (7,143,140 samples, 0.01%) + -[[kernel.kallsyms]] (1,669,404,130 samples, 1.76%) - +[[kernel.kallsyms]] (11,778,485 samples, 0.02%) + -[[kernel.kallsyms]] (16,706,036 samples, 0.02%) - +[[kernel.kallsyms]] (25,484,485 samples, 0.05%) + -dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> >::get (28,761,838,986 samples, 30.38%) -dml::handler<dml::mem_copy_operation, std::alloc.. +[[kernel.kallsyms]] (26,297,644 samples, 0.05%) + -__libc_openat64 (18,134,425 samples, 0.02%) - +[[kernel.kallsyms]] (259,363,157 samples, 0.46%) + -[[kernel.kallsyms]] (53,213,161 samples, 0.06%) - +_int_malloc (10,813,731 samples, 0.02%) + -std::unordered_map<unsigned char*, dsacache::CacheData, std::hash<unsigned char*>, std::equal_to<unsigned char*>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> > >::clear (28,764,433,921 samples, 30.38%) -std::unordered_map<unsigned char*, dsacache::Cac.. +[[kernel.kallsyms]] (9,113,218 samples, 0.02%) + -__GI___close_nocancel (12,187,019 samples, 0.01%) - +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +[[kernel.kallsyms]] (80,579,994 samples, 0.14%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +[[kernel.kallsyms]] (7,344,388 samples, 0.01%) + -[[kernel.kallsyms]] (12,520,276 samples, 0.01%) - +[[kernel.kallsyms]] (8,004,974 samples, 0.01%) + -[[kernel.kallsyms]] (17,230,177 samples, 0.02%) - +[[kernel.kallsyms]] (20,280,664 samples, 0.04%) + -__GI_munmap (57,393,658 samples, 0.06%) - +dml::core::dispatcher::hw_dispatcher::initialize_hw (23,965,836 samples, 0.04%) + -[[kernel.kallsyms]] (32,564,987 samples, 0.03%) - +[[kernel.kallsyms]] (25,484,485 samples, 0.05%) + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (50,149,163 samples, 0.09%) + -[[kernel.kallsyms]] (11,325,305 samples, 0.01%) - +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + -[[kernel.kallsyms]] (33,465,879 samples, 0.04%) - +[[kernel.kallsyms]] (256,851,116 samples, 0.46%) + -[[kernel.kallsyms]] (9,949,805 samples, 0.01%) - +[[kernel.kallsyms]] (7,604,051 samples, 0.01%) + -Vector_Loader<unsigned long, (254,936,384 samples, 0.27%) - +[[kernel.kallsyms]] (34,047,863 samples, 0.06%) + -__GI___libc_read (33,664,059 samples, 0.04%) - +__GI__IO_file_open (17,566,875 samples, 0.03%) + -syscall (138,487,524 samples, 0.15%) - +[[kernel.kallsyms]] (93,967,068 samples, 0.17%) + -[[kernel.kallsyms]] (7,364,103,227 samples, 7.78%) -[[kernel.k.. +[[kernel.kallsyms]] (14,391,559 samples, 0.03%) + -dsacache::CacheData::~CacheData (1,966,417,818 samples, 2.08%) -d.. +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + -[[kernel.kallsyms]] (26,513,138 samples, 0.03%) - +dml::core::dispatcher::hw_queue::initialize_new_queue (4,914,889 samples, 0.01%) + -[[kernel.kallsyms]] (15,898,104 samples, 0.02%) - +grow_heap (9,972,283 samples, 0.02%) + -__libc_open64 (28,967,395 samples, 0.03%) - +[[kernel.kallsyms]] (26,783,041 samples, 0.05%) + -[[kernel.kallsyms]] (12,123,111 samples, 0.01%) - +[[kernel.kallsyms]] (7,785,866 samples, 0.01%) + -[[kernel.kallsyms]] (13,375,110 samples, 0.01%) - +[[kernel.kallsyms]] (7,344,388 samples, 0.01%) + -scan_a (8,704,824,691 samples, 9.19%) -scan_a +[[kernel.kallsyms]] (8,650,216 samples, 0.02%) + -[[kernel.kallsyms]] (13,375,110 samples, 0.01%) - +[[kernel.kallsyms]] (18,034,607 samples, 0.03%) + -[[kernel.kallsyms]] (12,989,541 samples, 0.01%) - +dsacache::CacheData::WaitOnCompletion (27,884,008,166 samples, 49.61%) +dsacache::CacheData::WaitOnCompletion -[[kernel.kallsyms]] (338,949,020 samples, 0.36%) - +[[kernel.kallsyms]] (16,912,912 samples, 0.03%) + -__GI_madvise (34,320,994 samples, 0.04%) - +[[kernel.kallsyms]] (5,740,414 samples, 0.01%) + -[[kernel.kallsyms]] (1,682,876,214 samples, 1.78%) - +_int_memalign (52,769,643 samples, 0.09%) + -[[kernel.kallsyms]] (16,726,677 samples, 0.02%) - +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + -[[kernel.kallsyms]] (138,263,108 samples, 0.15%) - +[[kernel.kallsyms]] (50,154,117 samples, 0.09%) + -[[kernel.kallsyms]] (11,262,173 samples, 0.01%) - +[[kernel.kallsyms]] (7,316,287,736 samples, 13.02%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (8,658,743 samples, 0.01%) - +scan_b (201,442,301 samples, 0.36%) + -[[kernel.kallsyms]] (344,116,366 samples, 0.36%) - +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +std::__new_allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (46,717,102 samples, 0.08%) + -[[kernel.kallsyms]] (33,066,692 samples, 0.03%) - +dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (52,769,643 samples, 0.09%) + -[[kernel.kallsyms]] (28,967,395 samples, 0.03%) - +[[kernel.kallsyms]] (24,636,857 samples, 0.04%) + -[[kernel.kallsyms]] (30,443,956 samples, 0.03%) - +[[kernel.kallsyms]] (5,197,845 samples, 0.01%) + -[[kernel.kallsyms]] (15,136,135 samples, 0.02%) - +_int_malloc (35,828,488 samples, 0.06%) + -[[kernel.kallsyms]] (107,851,474 samples, 0.11%) - +[[kernel.kallsyms]] (7,785,866 samples, 0.01%) + -[[kernel.kallsyms]] (176,806,975 samples, 0.19%) - +sysmalloc (52,769,643 samples, 0.09%) + -[[kernel.kallsyms]] (20,787,998 samples, 0.02%) - +[[kernel.kallsyms]] (25,484,485 samples, 0.05%) + -[[kernel.kallsyms]] (21,755,396 samples, 0.02%) - +[[kernel.kallsyms]] (19,162,146 samples, 0.03%) + -[[kernel.kallsyms]] (10,010,002 samples, 0.01%) - +[[kernel.kallsyms]] (34,047,863 samples, 0.06%) + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +[[kernel.kallsyms]] (25,918,503 samples, 0.05%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +main (47,031,727,484 samples, 83.69%) +main -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +Filter<unsigned long, LT, (3,458,098,462 samples, 6.15%) +Filter<u.. -[[kernel.kallsyms]] (245,227,514 samples, 0.26%) - +[[kernel.kallsyms]] (6,835,870 samples, 0.01%) + -[[kernel.kallsyms]] (13,369,508 samples, 0.01%) - +[[kernel.kallsyms]] (14,838,521 samples, 0.03%) + -[[kernel.kallsyms]] (22,968,243 samples, 0.02%) - +[[kernel.kallsyms]] (8,356,058 samples, 0.01%) + -[[kernel.kallsyms]] (40,211,614 samples, 0.04%) - +accfg_wq_get_first (16,212,211 samples, 0.03%) + -std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_node (28,764,433,921 samples, 30.38%) -std::__detail::_Hashtable_alloc<std::allocator<s.. +std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (46,717,102 samples, 0.08%) + -__GI___mmap64 (177,643,122 samples, 0.19%) - +[[kernel.kallsyms]] (90,779,509 samples, 0.16%) + -[[kernel.kallsyms]] (8,859,224 samples, 0.01%) - +[[kernel.kallsyms]] (9,513,431 samples, 0.02%) + -[[kernel.kallsyms]] (33,465,879 samples, 0.04%) - +[[kernel.kallsyms]] (15,074,171 samples, 0.03%) + -[[kernel.kallsyms]] (248,672,554 samples, 0.26%) - +__GI___nptl_deallocate_stack (6,540,584 samples, 0.01%) + -dsacache::Cache::Clear (28,764,433,921 samples, 30.38%) -dsacache::Cache::Clear +[[kernel.kallsyms]] (15,623,013 samples, 0.03%) + -[[kernel.kallsyms]] (15,063,068 samples, 0.02%) - +[[kernel.kallsyms]] (8,817,500 samples, 0.02%) + -[[kernel.kallsyms]] (12,123,111 samples, 0.01%) - +dsacache::Cache::ExecuteCopy (78,330,121 samples, 0.14%) + -[[kernel.kallsyms]] (32,564,987 samples, 0.03%) - +sysmalloc (35,828,488 samples, 0.06%) + -[[kernel.kallsyms]] (9,678,436 samples, 0.01%) - +[[kernel.kallsyms]] (7,229,598 samples, 0.01%) + -[[kernel.kallsyms]] (11,360,272 samples, 0.01%) - +[[kernel.kallsyms]] (6,370,691 samples, 0.01%) + -[[kernel.kallsyms]] (15,956,913 samples, 0.02%) - +[[kernel.kallsyms]] (179,023,692 samples, 0.32%) + -[[kernel.kallsyms]] (137,415,380 samples, 0.15%) - +dsacache::Cache::Clear (27,978,244,770 samples, 49.78%) +dsacache::Cache::Clear -syscall (32,926,402 samples, 0.03%) - +[[kernel.kallsyms]] (11,870,294 samples, 0.02%) + -[[kernel.kallsyms]] (176,806,975 samples, 0.19%) - +[[kernel.kallsyms]] (22,333,273 samples, 0.04%) + -[[kernel.kallsyms]] (33,465,879 samples, 0.04%) - +[[kernel.kallsyms]] (16,822,016 samples, 0.03%) + -[[kernel.kallsyms]] (88,681,765 samples, 0.09%) - +[[kernel.kallsyms]] (10,378,720 samples, 0.02%) + -[[kernel.kallsyms]] (95,400,273 samples, 0.10%) - +allocate_stack (12,724,581 samples, 0.02%) + -void std::destroy_at<std::pair<unsigned char* const, dsacache::CacheData> > (28,764,433,921 samples, 30.38%) -void std::destroy_at<std::pair<unsigned char* co.. +[[kernel.kallsyms]] (12,973,510 samples, 0.02%) + -[[kernel.kallsyms]] (12,127,517 samples, 0.01%) - +[[kernel.kallsyms]] (11,870,294 samples, 0.02%) + -__GI_munmap (344,513,058 samples, 0.36%) - +[[kernel.kallsyms]] (32,804,268 samples, 0.06%) + -_int_malloc (8,250,299 samples, 0.01%) - +[[kernel.kallsyms]] (12,330,911 samples, 0.02%) + -[[kernel.kallsyms]] (11,377,500 samples, 0.01%) - +aggr_j (2,905,466,537 samples, 5.17%) +aggr_j -[[kernel.kallsyms]] (131,601,705 samples, 0.14%) - +[[kernel.kallsyms]] (25,315,610 samples, 0.05%) + -[[kernel.kallsyms]] (10,376,760 samples, 0.01%) - +[[kernel.kallsyms]] (32,804,268 samples, 0.06%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +[[kernel.kallsyms]] (7,785,866 samples, 0.01%) + -[[kernel.kallsyms]] (12,127,517 samples, 0.01%) - +numa_alloc_onnode (146,910,655 samples, 0.26%) + -[[kernel.kallsyms]] (32,564,987 samples, 0.03%) - +[[kernel.kallsyms]] (21,152,354 samples, 0.04%) + -[[kernel.kallsyms]] (48,182,350 samples, 0.05%) - +dml::detail::ml::task<std::allocator<unsigned char> >::task (10,813,731 samples, 0.02%) + -std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_nodes (1,966,417,818 samples, 2.08%) -s.. +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + -[[kernel.kallsyms]] (14,195,296 samples, 0.01%) - +[[kernel.kallsyms]] (45,659,261 samples, 0.08%) + -[libstdc++.so.6.0.32] (11,864,179,368 samples, 12.53%) -[libstdc++.so.6.0... +[[kernel.kallsyms]] (6,066,302 samples, 0.01%) + -[[kernel.kallsyms]] (8,063,816 samples, 0.01%) - +[[kernel.kallsyms]] (7,104,752 samples, 0.01%) + -[[kernel.kallsyms]] (15,980,460 samples, 0.02%) - +_int_memalign (35,828,488 samples, 0.06%) + -[[kernel.kallsyms]] (12,127,517 samples, 0.01%) - +[[kernel.kallsyms]] (7,169,816 samples, 0.01%) + -[[kernel.kallsyms]] (15,956,913 samples, 0.02%) - +[[kernel.kallsyms]] (48,567,551 samples, 0.09%) + -[[kernel.kallsyms]] (53,213,161 samples, 0.06%) - +[[kernel.kallsyms]] (52,197,849 samples, 0.09%) + -[[kernel.kallsyms]] (11,262,173 samples, 0.01%) - +auto dml::detail::submit<dml::hardware, dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >, dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (78,330,121 samples, 0.14%) + -void fill_mt<unsigned long> (318,356,350 samples, 0.34%) - +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + -unsigned long std::uniform_int_distribution<unsigned long>::operator (11,234,884,028 samples, 11.87%) -unsigned long std.. +[[kernel.kallsyms]] (60,893,681 samples, 0.11%) + -[[kernel.kallsyms]] (104,828,295 samples, 0.11%) - +[[kernel.kallsyms]] (80,579,994 samples, 0.14%) + -[[kernel.kallsyms]] (246,862,154 samples, 0.26%) - +__nptl_free_stacks (6,540,584 samples, 0.01%) + -[[kernel.kallsyms]] (12,127,517 samples, 0.01%) - +[[kernel.kallsyms]] (259,363,157 samples, 0.46%) + -[[kernel.kallsyms]] (5,081,416,893 samples, 5.37%) -[[kern.. +std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_nodes (27,978,244,770 samples, 49.78%) +std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pa.. -[[kernel.kallsyms]] (27,540,952 samples, 0.03%) - +[[kernel.kallsyms]] (50,154,117 samples, 0.09%) + -[[kernel.kallsyms]] (344,116,366 samples, 0.36%) - +__GI___libc_read (6,155,254 samples, 0.01%) + -dsacache::CacheData::WaitOnCompletion (28,761,838,986 samples, 30.38%) -dsacache::CacheData::WaitOnCompletion +__GI___mmap64 (93,967,068 samples, 0.17%) + -[[kernel.kallsyms]] (1,678,981,470 samples, 1.77%) - +mbind (26,878,702 samples, 0.05%) + -QDPBench (94,672,108,664 samples, 99.98%) -QDPBench +[[kernel.kallsyms]] (12,902,294 samples, 0.02%) + -[[kernel.kallsyms]] (8,659,826 samples, 0.01%) - +void std::allocator_traits<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::destroy<std::pair<unsigned char* const, dsacache::CacheData> > (27,978,244,770 samples, 49.78%) +void std::allocator_traits<std::allocator<std::__detail::_Hash_node<std::pair<un.. -void std::allocator_traits<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::destroy<std::pair<unsigned char* const, dsacache::CacheData> > (28,764,433,921 samples, 30.38%) -void std::allocator_traits<std::allocator<std::_.. +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + -[[kernel.kallsyms]] (17,322,586 samples, 0.02%) - +[[kernel.kallsyms]] (13,838,204 samples, 0.02%) + -[[kernel.kallsyms]] (53,213,161 samples, 0.06%) - +Aggregation<unsigned long, Sum, (2,493,804,416 samples, 4.44%) +Aggre.. -[[kernel.kallsyms]] (12,127,517 samples, 0.01%) - +[[kernel.kallsyms]] (49,739,189 samples, 0.09%) + -[[kernel.kallsyms]] (344,116,366 samples, 0.36%) - +[[kernel.kallsyms]] (54,854,853 samples, 0.10%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +[[kernel.kallsyms]] (44,111,847 samples, 0.08%) + -[[kernel.kallsyms]] (26,141,703 samples, 0.03%) - +[[kernel.kallsyms]] (43,636,433 samples, 0.08%) + -[[kernel.kallsyms]] (14,935,581 samples, 0.02%) - +numa_node_size64 (59,279,404 samples, 0.11%) + -[[kernel.kallsyms]] (11,262,173 samples, 0.01%) - +[[kernel.kallsyms]] (76,833,477 samples, 0.14%) + -[[kernel.kallsyms]] (9,537,190 samples, 0.01%) - +_mm512_stream_load_si512 (1,931,677,068 samples, 3.44%) +_mm.. -[[kernel.kallsyms]] (1,682,036,018 samples, 1.78%) - +[[stack]] (1,230,514,799 samples, 2.19%) +[.. -[[kernel.kallsyms]] (15,037,349 samples, 0.02%) - +[[kernel.kallsyms]] (10,494,303 samples, 0.02%) + -[[kernel.kallsyms]] (315,357,590 samples, 0.33%) - +[[kernel.kallsyms]] (7,785,866 samples, 0.01%) + -[libstdc++.so.6.0.32] (34,054,556 samples, 0.04%) - +[[kernel.kallsyms]] (11,778,485 samples, 0.02%) + -dsacache::CacheData::WaitOnCompletion (1,966,417,818 samples, 2.08%) -d.. +[[kernel.kallsyms]] (24,405,965 samples, 0.04%) + -[[kernel.kallsyms]] (1,657,817,030 samples, 1.75%) - +[[kernel.kallsyms]] (6,540,584 samples, 0.01%) + -sum_check (1,353,759,950 samples, 1.43%) - +[[kernel.kallsyms]] (9,512,265 samples, 0.02%) + -[[kernel.kallsyms]] (15,447,695 samples, 0.02%) - +dsacache::Cache::Access (319,122,983 samples, 0.57%) + -[[kernel.kallsyms]] (32,926,402 samples, 0.03%) - +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + -[[kernel.kallsyms]] (8,063,816 samples, 0.01%) - +[[kernel.kallsyms]] (5,740,414 samples, 0.01%) + -[[kernel.kallsyms]] (19,051,325 samples, 0.02%) - +[[kernel.kallsyms]] (78,924,962 samples, 0.14%) + -[[kernel.kallsyms]] (7,360,638,360 samples, 7.77%) -[[kernel.k.. +[[kernel.kallsyms]] (93,967,068 samples, 0.17%) + -Sum<unsigned long>::simd_agg (385,702,703 samples, 0.41%) - +[[kernel.kallsyms]] (22,459,290 samples, 0.04%) + -[[kernel.kallsyms]] (175,150,834 samples, 0.18%) - +[[kernel.kallsyms]] (32,804,268 samples, 0.06%) + -[[kernel.kallsyms]] (129,657,231 samples, 0.14%) - +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + -main (30,118,193,871 samples, 31.81%) -main +[[kernel.kallsyms]] (12,108,425 samples, 0.02%) + -__libc_start_call_main (23,727,804,096 samples, 25.06%) -__libc_start_call_main +[[kernel.kallsyms]] (256,323,849 samples, 0.46%) + -[[kernel.kallsyms]] (12,127,517 samples, 0.01%) - +std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (52,769,643 samples, 0.09%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +__GI___libc_read (6,155,254 samples, 0.01%) + -[[kernel.kallsyms]] (30,034,265 samples, 0.03%) - +operator new (46,717,102 samples, 0.08%) + -[[kernel.kallsyms]] (31,615,276 samples, 0.03%) - +dml::core::hardware_device::submit (25,560,478 samples, 0.05%) + -[[kernel.kallsyms]] (319,402,286 samples, 0.34%) - +_int_malloc (45,810,524 samples, 0.08%) + -clone3 (104,828,295 samples, 0.11%) - +[[kernel.kallsyms]] (14,391,559 samples, 0.03%) + -[[kernel.kallsyms]] (12,123,111 samples, 0.01%) - +std::unordered_map<unsigned char*, dsacache::CacheData, std::hash<unsigned char*>, std::equal_to<unsigned char*>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> > >::clear (27,978,244,770 samples, 49.78%) +std::unordered_map<unsigned char*, dsacache::CacheData, std::hash<unsigned char*.. -[[kernel.kallsyms]] (11,696,008 samples, 0.01%) - +dml::handler<dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >::allocator_type> dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (49,871,202 samples, 0.09%) + -[[kernel.kallsyms]] (13,455,441 samples, 0.01%) - +[[kernel.kallsyms]] (5,197,845 samples, 0.01%) + -main (318,356,350 samples, 0.34%) - +[[kernel.kallsyms]] (15,189,233 samples, 0.03%) + -sysmalloc (9,949,805 samples, 0.01%) - +[[kernel.kallsyms]] (6,540,584 samples, 0.01%) + -[[kernel.kallsyms]] (23,517,953 samples, 0.02%) - +__GI_munmap (56,894,061 samples, 0.10%) + -[[kernel.kallsyms]] (1,680,680,697 samples, 1.77%) - +[[kernel.kallsyms]] (13,011,844 samples, 0.02%) + -[[kernel.kallsyms]] (7,358,907,042 samples, 7.77%) -[[kernel.k.. +[[kernel.kallsyms]] (9,512,265 samples, 0.02%) + -[unknown] (30,249,212,035 samples, 31.95%) -[unknown] +operator new (35,828,488 samples, 0.06%) + -[[kernel.kallsyms]] (23,405,713 samples, 0.02%) - +[[kernel.kallsyms]] (91,643,182 samples, 0.16%) + -[[kernel.kallsyms]] (53,213,161 samples, 0.06%) - +std::allocator_traits<std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> > >::allocate (35,828,488 samples, 0.06%) + -[[kernel.kallsyms]] (49,896,815 samples, 0.05%) - +[[kernel.kallsyms]] (67,470,988 samples, 0.12%) + -[[kernel.kallsyms]] (7,361,504,795 samples, 7.77%) -[[kernel.k.. +[[kernel.kallsyms]] (7,929,584 samples, 0.01%) + -[[kernel.kallsyms]] (93,052,024 samples, 0.10%) - +[[kernel.kallsyms]] (6,835,870 samples, 0.01%) + -[[kernel.kallsyms]] (261,422,820 samples, 0.28%) - +[[kernel.kallsyms]] (7,344,388 samples, 0.01%) + -[[kernel.kallsyms]] (96,511,467 samples, 0.10%) - +[[kernel.kallsyms]] (6,785,217 samples, 0.01%) + -[[kernel.kallsyms]] (8,659,826 samples, 0.01%) - +[[kernel.kallsyms]] (8,650,216 samples, 0.02%) + -[[kernel.kallsyms]] (8,659,826 samples, 0.01%) - +[[kernel.kallsyms]] (7,344,388 samples, 0.01%) + -[[kernel.kallsyms]] (246,862,154 samples, 0.26%) - +[[kernel.kallsyms]] (7,030,285 samples, 0.01%) + -[[kernel.kallsyms]] (15,980,460 samples, 0.02%) - +[[kernel.kallsyms]] (7,450,133 samples, 0.01%) + -[[kernel.kallsyms]] (9,406,638 samples, 0.01%) - +accfg_get_param_long (9,811,793 samples, 0.02%) + -[[kernel.kallsyms]] (33,465,879 samples, 0.04%) - +[[kernel.kallsyms]] (29,377,009 samples, 0.05%) + -std::pair<unsigned char* const, dsacache::CacheData>::~pair (28,764,433,921 samples, 30.38%) -std::pair<unsigned char* const, dsacache::CacheD.. +[[kernel.kallsyms]] (19,162,146 samples, 0.03%) + -[[kernel.kallsyms]] (28,967,395 samples, 0.03%) - +[[kernel.kallsyms]] (6,066,302 samples, 0.01%) + -[[kernel.kallsyms]] (58,838,049 samples, 0.06%) - +[[kernel.kallsyms]] (11,362,923 samples, 0.02%) + -[[kernel.kallsyms]] (175,150,834 samples, 0.18%) - +_IO_new_file_underflow (6,564,990 samples, 0.01%) + -[[kernel.kallsyms]] (8,634,907 samples, 0.01%) - +[[kernel.kallsyms]] (50,154,117 samples, 0.09%) + -[[kernel.kallsyms]] (28,513,818 samples, 0.03%) - +[[kernel.kallsyms]] (6,774,676 samples, 0.01%) + -[[kernel.kallsyms]] (177,643,122 samples, 0.19%) - +[[kernel.kallsyms]] (11,778,485 samples, 0.02%) + -std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_nodes (28,764,433,921 samples, 30.38%) -std::__detail::_Hashtable_alloc<std::allocator<s.. +dml::detail::ml::buffer<std::allocator<unsigned char>, dml::detail::descriptor, dml::detail::completion_record>::buffer (52,769,643 samples, 0.09%) + -[[kernel.kallsyms]] (15,956,913 samples, 0.02%) - +[[kernel.kallsyms]] (25,315,610 samples, 0.05%) + -[[kernel.kallsyms]] (104,828,295 samples, 0.11%) - +[[kernel.kallsyms]] (6,357,006 samples, 0.01%) + -[[kernel.kallsyms]] (31,960,699 samples, 0.03%) - +[[kernel.kallsyms]] (16,010,002 samples, 0.03%) + -void std::allocator_traits<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::destroy<std::pair<unsigned char* const, dsacache::CacheData> > (1,966,417,818 samples, 2.08%) -v.. +__libc_start_call_main (47,034,233,602 samples, 83.69%) +__libc_start_call_main -[[kernel.kallsyms]] (27,158,879 samples, 0.03%) - +dsacache::Cache::GetCacheNode (28,516,368 samples, 0.05%) + -[[kernel.kallsyms]] (31,598,409 samples, 0.03%) - +void fill_mt<unsigned long> (17,603,955,046 samples, 31.32%) +void fill_mt<unsigned long> -[[kernel.kallsyms]] (104,828,295 samples, 0.11%) - +auto dml::detail::submit<dml::hardware, dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >, dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (49,871,202 samples, 0.09%) + -[[kernel.kallsyms]] (11,262,173 samples, 0.01%) - +[[kernel.kallsyms]] (13,838,204 samples, 0.02%) + -[[kernel.kallsyms]] (135,625,471 samples, 0.14%) - +__GI___libc_read (15,623,013 samples, 0.03%) + -[[kernel.kallsyms]] (9,447,981 samples, 0.01%) - +dml::detail::ml::buffer<std::allocator<unsigned char>, dml::detail::descriptor, dml::detail::completion_record>::buffer (46,717,102 samples, 0.08%) + -[[kernel.kallsyms]] (28,967,395 samples, 0.03%) - +[[kernel.kallsyms]] (26,878,702 samples, 0.05%) + -[[kernel.kallsyms]] (32,926,402 samples, 0.03%) - +[[kernel.kallsyms]] (26,878,702 samples, 0.05%) + -dsacache::CacheData::WaitOnCompletion (28,763,569,382 samples, 30.38%) -dsacache::CacheData::WaitOnCompletion +[[kernel.kallsyms]] (6,540,584 samples, 0.01%) + -[[kernel.kallsyms]] (12,989,541 samples, 0.01%) - +_IO_new_fclose (8,909,079 samples, 0.02%) + -[[kernel.kallsyms]] (32,926,402 samples, 0.03%) - +[[kernel.kallsyms]] (8,647,063 samples, 0.02%) + -__GI_mprotect (249,174,735 samples, 0.26%) - +[[kernel.kallsyms]] (25,484,485 samples, 0.05%) + -_mm512_mask_add_epi64 (385,702,703 samples, 0.41%) - +[[kernel.kallsyms]] (80,579,994 samples, 0.14%) + -[[kernel.kallsyms]] (10,670,752 samples, 0.01%) - +[[kernel.kallsyms]] (155,318,192 samples, 0.28%) + -std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (5,368,469,078 samples, 5.67%) -std::me.. +add_wq (15,236,807 samples, 0.03%) + -[[kernel.kallsyms]] (13,166,222 samples, 0.01%) - +openat (6,835,870 samples, 0.01%) + -[[kernel.kallsyms]] (15,860,284 samples, 0.02%) - +[[kernel.kallsyms]] (7,785,866 samples, 0.01%) + -[[kernel.kallsyms]] (11,667,956 samples, 0.01%) - +std::thread::_M_start_thread (12,724,581 samples, 0.02%) + -[[kernel.kallsyms]] (6,143,399,957 samples, 6.49%) -[[kernel.. +sysmalloc (45,810,524 samples, 0.08%) + -[[kernel.kallsyms]] (11,840,194 samples, 0.01%) - +[[kernel.kallsyms]] (42,101,327 samples, 0.07%) + -aggr_j (3,149,422,702 samples, 3.33%) -agg.. +[[kernel.kallsyms]] (5,553,168 samples, 0.01%) + -[[kernel.kallsyms]] (100,513,983 samples, 0.11%) - +advise_stack_range (11,778,485 samples, 0.02%) + -[[kernel.kallsyms]] (18,442,842 samples, 0.02%) - +_int_memalign (46,717,102 samples, 0.08%) + -[[kernel.kallsyms]] (9,821,260 samples, 0.01%) - +[[kernel.kallsyms]] (88,186,671 samples, 0.16%) + -[[kernel.kallsyms]] (9,821,260 samples, 0.01%) - +dsacache::Cache::AllocOnNode (206,190,059 samples, 0.37%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +syscall (52,197,849 samples, 0.09%) + -[[kernel.kallsyms]] (918,088,258 samples, 0.97%) - +[[kernel.kallsyms]] (34,047,863 samples, 0.06%) + -[[kernel.kallsyms]] (36,110,247 samples, 0.04%) - +_mm512_cmplt_epi64_mask (26,906,506 samples, 0.05%) + -[[kernel.kallsyms]] (53,213,161 samples, 0.06%) - +[[kernel.kallsyms]] (6,066,302 samples, 0.01%) + -[[kernel.kallsyms]] (98,275,531 samples, 0.10%) - +[[kernel.kallsyms]] (8,647,063 samples, 0.02%) + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +[[kernel.kallsyms]] (5,869,681 samples, 0.01%) + -[[kernel.kallsyms]] (31,978,623 samples, 0.03%) - +[[kernel.kallsyms]] (7,323,204,071 samples, 13.03%) +[[kernel.kallsyms]] -[[kernel.kallsyms]] (104,828,295 samples, 0.11%) - +[[kernel.kallsyms]] (8,650,216 samples, 0.02%) + -[[kernel.kallsyms]] (16,726,677 samples, 0.02%) - +start_thread (6,580,772,960 samples, 11.71%) +start_thread -start_thread (11,866,281,284 samples, 12.53%) -start_thread +dml::detail::ml::task<std::allocator<unsigned char> >::task (46,717,102 samples, 0.08%) + -[[kernel.kallsyms]] (13,855,268 samples, 0.01%) - +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::_M_gen_rand (1,623,195,042 samples, 2.89%) +st.. -[[kernel.kallsyms]] (28,967,395 samples, 0.03%) - +std::__new_allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (52,769,643 samples, 0.09%) + -dml_wait_busy_poll (28,760,234,712 samples, 30.37%) -dml_wait_busy_poll +[[kernel.kallsyms]] (50,154,117 samples, 0.09%) + -[[kernel.kallsyms]] (62,804,408 samples, 0.07%) - +[[kernel.kallsyms]] (16,480,213 samples, 0.03%) + -[[kernel.kallsyms]] (10,392,609 samples, 0.01%) - +mbind (52,197,849 samples, 0.09%) + -[[kernel.kallsyms]] (9,447,981 samples, 0.01%) - +[[kernel.kallsyms]] (10,757,457 samples, 0.02%) + -scan_b (9,550,609 samples, 0.01%) - +[[kernel.kallsyms]] (5,997,398 samples, 0.01%) + -[[kernel.kallsyms]] (17,433,636 samples, 0.02%) - +[[kernel.kallsyms]] (8,817,500 samples, 0.02%) + -[[kernel.kallsyms]] (165,965,854 samples, 0.18%) - +[[kernel.kallsyms]] (18,346,934 samples, 0.03%) + -[[kernel.kallsyms]] (17,299,154 samples, 0.02%) - +[[kernel.kallsyms]] (25,918,503 samples, 0.05%) + -unsigned long std::uniform_int_distribution<unsigned long>::operator (318,356,350 samples, 0.34%) - +Sum<unsigned long>::simd_agg (414,838,602 samples, 0.74%) + -[[kernel.kallsyms]] (10,391,117 samples, 0.01%) - +[[kernel.kallsyms]] (34,047,863 samples, 0.06%) + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +__GI_munmap (80,579,994 samples, 0.14%) + -dsacache::CacheData::~CacheData (28,764,433,921 samples, 30.38%) -dsacache::CacheData::~CacheData +[[kernel.kallsyms]] (92,507,760 samples, 0.16%) + -[[kernel.kallsyms]] (10,527,197 samples, 0.01%) - +[[kernel.kallsyms]] (6,540,584 samples, 0.01%) + -all (94,687,755,116 samples, 100%) - +[[kernel.kallsyms]] (5,002,702,867 samples, 8.90%) +[[kernel.kal.. -std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_node (1,966,417,818 samples, 2.08%) -s.. +[[kernel.kallsyms]] (32,408,072 samples, 0.06%) + -Aggregation<unsigned long, Sum, (3,141,476,506 samples, 3.32%) -Agg.. +[[kernel.kallsyms]] (9,123,981 samples, 0.02%) + -[[kernel.kallsyms]] (241,405,457 samples, 0.25%) - +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + -[[kernel.kallsyms]] (1,674,994,497 samples, 1.77%) - +__GI_munmap (6,540,584 samples, 0.01%) + -[[kernel.kallsyms]] (104,828,295 samples, 0.11%) - +_int_malloc (52,769,643 samples, 0.09%) + -sum_check (1,366,734,703 samples, 1.44%) - +[[kernel.kallsyms]] (44,111,847 samples, 0.08%) + -std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::_M_gen_rand (1,706,155,645 samples, 1.80%) -s.. +__GI___mmap64 (32,804,268 samples, 0.06%) + -std::unordered_map<unsigned char*, dsacache::CacheData, std::hash<unsigned char*>, std::equal_to<unsigned char*>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> > >::clear (1,966,417,818 samples, 2.08%) -s.. +[[kernel.kallsyms]] (85,434,447 samples, 0.15%) + -[[kernel.kallsyms]] (138,263,108 samples, 0.15%) - +Vector_Loader<unsigned long, (2,993,062,044 samples, 5.33%) +Vector.. -[[kernel.kallsyms]] (20,434,777 samples, 0.02%) - +[[kernel.kallsyms]] (7,785,866 samples, 0.01%) + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + -[[kernel.kallsyms]] (53,213,161 samples, 0.06%) - +[[kernel.kallsyms]] (11,778,485 samples, 0.02%) + -[[kernel.kallsyms]] (344,084,005 samples, 0.36%) - +grow_heap (44,111,847 samples, 0.08%) + -[[kernel.kallsyms]] (344,116,366 samples, 0.36%) - +_mid_memalign (35,828,488 samples, 0.06%) + -[[kernel.kallsyms]] (88,814,362 samples, 0.09%) - +_start (47,036,380,628 samples, 83.69%) +_start -[[kernel.kallsyms]] (53,949,804 samples, 0.06%) - +[[kernel.kallsyms]] (25,918,503 samples, 0.05%) + -[[kernel.kallsyms]] (86,960,164 samples, 0.09%) - +dsacache::CacheData::Deallocate (92,507,760 samples, 0.16%) + -sudo (11,865,829 samples, 0.01%) - +[[kernel.kallsyms]] (19,162,146 samples, 0.03%) + -__GI___libc_read (33,519,380 samples, 0.04%) - +[[kernel.kallsyms]] (8,551,434 samples, 0.02%) + -[[kernel.kallsyms]] (246,766,639 samples, 0.26%) - +[[kernel.kallsyms]] (5,543,739 samples, 0.01%) + -dsacache::Cache::Access (8,260,043 samples, 0.01%) - +[[kernel.kallsyms]] (15,896,933 samples, 0.03%) + -[[kernel.kallsyms]] (278,685,785 samples, 0.29%) - +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + -[[kernel.kallsyms]] (29,649,044 samples, 0.03%) - +_IO_new_file_fopen (17,566,875 samples, 0.03%) + -std::pair<unsigned char* const, dsacache::CacheData>::~pair (1,966,417,818 samples, 2.08%) -s.. +__fopen_internal (17,566,875 samples, 0.03%) + -[[kernel.kallsyms]] (53,341,359 samples, 0.06%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (1,226,127,583 samples, 2.18%) +u.. -[[kernel.kallsyms]] (19,133,036 samples, 0.02%) - +grow_heap (34,047,863 samples, 0.06%) + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +[[kernel.kallsyms]] (10,376,330 samples, 0.02%) + -dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> >::get (1,966,417,818 samples, 2.08%) -d.. +[[kernel.kallsyms]] (35,593,800 samples, 0.06%) + -[[kernel.kallsyms]] (33,465,879 samples, 0.04%) - +std::thread::thread<void (12,724,581 samples, 0.02%) + -[[kernel.kallsyms]] (865,989,379 samples, 0.91%) - +[[kernel.kallsyms]] (41,592,934 samples, 0.07%) + -[[kernel.kallsyms]] (12,582,825 samples, 0.01%) - +dsacache::Cache::Access (199,605,584 samples, 0.36%) + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +[[kernel.kallsyms]] (8,630,962 samples, 0.02%) + -[[kernel.kallsyms]] (28,967,395 samples, 0.03%) - +[[kernel.kallsyms]] (25,315,610 samples, 0.05%) + -[[kernel.kallsyms]] (24,861,291 samples, 0.03%) - +[[kernel.kallsyms]] (11,778,485 samples, 0.02%) + -[[kernel.kallsyms]] (8,659,826 samples, 0.01%) - +[[kernel.kallsyms]] (10,757,457 samples, 0.02%) + -[[kernel.kallsyms]] (7,363,236,866 samples, 7.78%) -[[kernel.k.. +[[kernel.kallsyms]] (9,972,283 samples, 0.02%) + -__pthread_create_2_1 (53,213,161 samples, 0.06%) - +[[kernel.kallsyms]] (6,540,584 samples, 0.01%) + -[[kernel.kallsyms]] (12,127,517 samples, 0.01%) - +[[kernel.kallsyms]] (5,740,414 samples, 0.01%) + -[[kernel.kallsyms]] (151,341,840 samples, 0.16%) - +_mid_memalign (10,813,731 samples, 0.02%) + -[[kernel.kallsyms]] (246,862,154 samples, 0.26%) - +[[kernel.kallsyms]] (8,647,063 samples, 0.02%) + -[[kernel.kallsyms]] (7,283,945,970 samples, 7.69%) -[[kernel.k.. +dsacache::CacheData::WaitOnCompletion (27,883,144,753 samples, 49.61%) +dsacache::CacheData::WaitOnCompletion -[[kernel.kallsyms]] (299,429,858 samples, 0.32%) - +[[kernel.kallsyms]] (5,109,987 samples, 0.01%) + -[[kernel.kallsyms]] (9,076,738 samples, 0.01%) - +[[kernel.kallsyms]] (17,265,048 samples, 0.03%) + -[[kernel.kallsyms]] (10,874,429 samples, 0.01%) - +dml::detail::ml::task<std::allocator<unsigned char> >::task (35,828,488 samples, 0.06%) + -[[kernel.kallsyms]] (43,921,440 samples, 0.05%) - +[[kernel.kallsyms]] (25,484,485 samples, 0.05%) + -main (23,726,079,098 samples, 25.06%) -main +__GI_munmap (92,507,760 samples, 0.16%) + -allocate_stack (53,213,161 samples, 0.06%) - +[[kernel.kallsyms]] (93,967,068 samples, 0.17%) + -[[kernel.kallsyms]] (57,393,658 samples, 0.06%) - +[[kernel.kallsyms]] (11,870,294 samples, 0.02%) + -[[kernel.kallsyms]] (12,989,541 samples, 0.01%) - +[[kernel.kallsyms]] (9,919,156 samples, 0.02%) + -[[kernel.kallsyms]] (30,871,550 samples, 0.03%) - +[[kernel.kallsyms]] (25,315,610 samples, 0.05%) + -std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheData>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> >, std::__detail::_Select1st, std::equal_to<unsigned char*>, std::hash<unsigned char*>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >::clear (28,764,433,921 samples, 30.38%) -std::_Hashtable<unsigned char*, std::pair<unsign.. +[[kernel.kallsyms]] (5,997,398 samples, 0.01%) + -[[kernel.kallsyms]] (21,755,396 samples, 0.02%) - +[[kernel.kallsyms]] (8,020,276 samples, 0.01%) + -[[kernel.kallsyms]] (138,487,524 samples, 0.15%) - +[[kernel.kallsyms]] (10,757,457 samples, 0.02%) + -[[kernel.kallsyms]] (42,144,265 samples, 0.04%) - +__GI___close_nocancel (7,344,388 samples, 0.01%) + -[[kernel.kallsyms]] (25,018,958 samples, 0.03%) - +QDPBench (56,173,802,251 samples, 99.95%) +QDPBench -[[kernel.kallsyms]] (7,363,236,866 samples, 7.78%) -[[kernel.k.. +dml::core::dispatcher::hw_device::initialize_new_device (21,127,100 samples, 0.04%) + -unsigned long std::uniform_int_distribution<unsigned long>::operator (11,293,074,428 samples, 11.93%) -unsigned long std.. +[[kernel.kallsyms]] (5,740,414 samples, 0.01%) + -[[kernel.kallsyms]] (12,123,111 samples, 0.01%) - +operator new (10,813,731 samples, 0.02%) + -[[kernel.kallsyms]] (15,550,525 samples, 0.02%) - +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (5,144,892,302 samples, 9.15%) +unsigned int .. -std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheData>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> >, std::__detail::_Select1st, std::equal_to<unsigned char*>, std::hash<unsigned char*>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >::clear (1,966,417,818 samples, 2.08%) -s.. +[[kernel.kallsyms]] (6,540,584 samples, 0.01%) + -unsigned long std::uniform_int_distribution<unsigned long>::operator (318,356,350 samples, 0.34%) - +[[kernel.kallsyms]] (8,400,747 samples, 0.01%) + -__GI___mmap64 (177,643,122 samples, 0.19%) - +[[kernel.kallsyms]] (5,740,414 samples, 0.01%) + -[[kernel.kallsyms]] (53,213,161 samples, 0.06%) - +[unknown] (1,234,157,360 samples, 2.20%) +[.. + + +[[kernel.kallsyms]] (6,357,006 samples, 0.01%) + + + +dml::detail::ml::impl::hardware::submit (25,560,478 samples, 0.05%) + + + +[[kernel.kallsyms]] (7,324,068,091 samples, 13.03%) +[[kernel.kallsyms]] + + +_IO_new_file_close_it (7,344,388 samples, 0.01%) + + + +LT<unsigned long>::simd_filter (26,906,506 samples, 0.05%) + + + +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (1,015,021,080 samples, 1.81%) +u.. + + +__GI___mmap64 (93,967,068 samples, 0.17%) + + + +[[kernel.kallsyms]] (8,650,216 samples, 0.02%) + + + +[[kernel.kallsyms]] (258,139,562 samples, 0.46%) + + + +__libc_openat64 (6,835,870 samples, 0.01%) + + + +queue_stack (6,540,584 samples, 0.01%) + + + +[[kernel.kallsyms]] (11,040,629 samples, 0.02%) + + + +[[kernel.kallsyms]] (183,101,380 samples, 0.33%) + + + +__libc_open64 (17,566,875 samples, 0.03%) + + + +[[kernel.kallsyms]] (26,878,702 samples, 0.05%) + + + +[[kernel.kallsyms]] (5,302,335 samples, 0.01%) + + + +[[kernel.kallsyms]] (11,866,122 samples, 0.02%) + + + +std::allocator_traits<std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> > >::allocate (46,717,102 samples, 0.08%) + + + +_mm512_mask_add_epi64 (414,838,602 samples, 0.74%) + + + +std::pair<unsigned char* const, dsacache::CacheData>::~pair (27,978,244,770 samples, 49.78%) +std::pair<unsigned char* const, dsacache::CacheData>::~pair + + +[[kernel.kallsyms]] (33,581,259 samples, 0.06%) + + + +[[kernel.kallsyms]] (5,360,812 samples, 0.01%) + + + +[[kernel.kallsyms]] (32,112,996 samples, 0.06%) + + + +[[kernel.kallsyms]] (16,791,097 samples, 0.03%) + + + +[[kernel.kallsyms]] (6,799,118 samples, 0.01%) + + + +sudo (16,406,659 samples, 0.03%) + + + +std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_node (27,978,244,770 samples, 49.78%) +std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pa.. + + +wqs_init (16,212,211 samples, 0.03%) + + + +dml::detail::ml::buffer<std::allocator<unsigned char>, dml::detail::descriptor, dml::detail::completion_record>::buffer (35,828,488 samples, 0.06%) + + + +[[kernel.kallsyms]] (25,918,503 samples, 0.05%) + + + +[[kernel.kallsyms]] (38,027,005 samples, 0.07%) + + + +[[kernel.kallsyms]] (6,066,302 samples, 0.01%) + + + +_mid_memalign (52,769,643 samples, 0.09%) + + + +[[kernel.kallsyms]] (16,791,097 samples, 0.03%) + + + +operator new (52,769,643 samples, 0.09%) + + + +std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheData>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> >, std::__detail::_Select1st, std::equal_to<unsigned char*>, std::hash<unsigned char*>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >::clear (27,978,244,770 samples, 49.78%) +std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheD.. + + +[[kernel.kallsyms]] (11,870,294 samples, 0.02%) + + + +dsacache::CacheData::WaitOnCompletion (84,126,226 samples, 0.15%) + + + +dsacache::Cache::GetCacheNode (23,082,624 samples, 0.04%) + + + +[[kernel.kallsyms]] (93,216,145 samples, 0.17%) + + + +dsacache::Cache::SubmitTask (25,560,478 samples, 0.05%) + + + +void std::destroy_at<std::pair<unsigned char* const, dsacache::CacheData> > (27,978,244,770 samples, 49.78%) +void std::destroy_at<std::pair<unsigned char* const, dsacache::CacheData> > + + +__GI___libc_read (15,623,013 samples, 0.03%) + + + +[[kernel.kallsyms]] (89,913,865 samples, 0.16%) + + + +[[kernel.kallsyms]] (6,066,302 samples, 0.01%) + + + +Vector_Loader<unsigned long, (1,931,677,068 samples, 3.44%) +Vec.. + + +[[kernel.kallsyms]] (11,089,814 samples, 0.02%) + + + +__GI_madvise (11,778,485 samples, 0.02%) + + + +syscall (26,878,702 samples, 0.05%) + + + +[[kernel.kallsyms]] (16,071,115 samples, 0.03%) + + + +[[kernel.kallsyms]] (19,961,046 samples, 0.04%) + + + +_mid_memalign (46,717,102 samples, 0.08%) + + + +syscall (25,315,610 samples, 0.05%) + + + +[[kernel.kallsyms]] (9,972,283 samples, 0.02%) + + + +[[kernel.kallsyms]] (19,107,386 samples, 0.03%) + + + +[[kernel.kallsyms]] (13,838,204 samples, 0.02%) + + + +decltype (12,724,581 samples, 0.02%) + + + +std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (10,813,731 samples, 0.02%) + + + +[[kernel.kallsyms]] (13,514,538 samples, 0.02%) + + + +sum_check (1,371,232,114 samples, 2.44%) +su.. + + +[[kernel.kallsyms]] (9,972,283 samples, 0.02%) + + + +__GI___getdelim (22,226,508 samples, 0.04%) + + + +[[kernel.kallsyms]] (5,197,845 samples, 0.01%) + + + +[[kernel.kallsyms]] (27,648,058 samples, 0.05%) + + + +sysmalloc (10,813,731 samples, 0.02%) + + + +[[kernel.kallsyms]] (52,197,849 samples, 0.09%) + + + +[[kernel.kallsyms]] (5,740,414 samples, 0.01%) + + + +unsigned long std::uniform_int_distribution<unsigned long>::operator (7,679,240,842 samples, 13.66%) +unsigned long std::u.. + + +dml::detail::ml::buffer<std::allocator<unsigned char>, dml::detail::descriptor, dml::detail::completion_record>::buffer (10,813,731 samples, 0.02%) + + + +[[kernel.kallsyms]] (25,430,139 samples, 0.05%) + + + +std::allocator_traits<std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> > >::allocate (10,813,731 samples, 0.02%) + + + +[[kernel.kallsyms]] (6,347,448 samples, 0.01%) + + + +clone3 (6,606,257,445 samples, 11.75%) +clone3 + + +[[kernel.kallsyms]] (26,878,702 samples, 0.05%) + + + +__pthread_create_2_1 (12,724,581 samples, 0.02%) + + + +[[kernel.kallsyms]] (20,727,544 samples, 0.04%) + + + +dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> >::handler (35,828,488 samples, 0.06%) + + + +[[kernel.kallsyms]] (30,153,610 samples, 0.05%) + + + +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + + + +[[kernel.kallsyms]] (93,967,068 samples, 0.17%) + + + +auto dml::detail::ml::make_mem_move_task<std::allocator<unsigned char> > (46,717,102 samples, 0.08%) + + + +dml::core::dispatcher::hw_dispatcher::get_instance (25,315,981 samples, 0.05%) + + + +[[kernel.kallsyms]] (7,344,388 samples, 0.01%) + + + +__GI_mprotect (34,047,863 samples, 0.06%) + + + +[[kernel.kallsyms]] (9,972,283 samples, 0.02%) + + + +[[kernel.kallsyms]] (11,870,294 samples, 0.02%) + + + +__GI_mprotect (44,111,847 samples, 0.08%) + + + +numa_node_size64 (17,944,285 samples, 0.03%) + + + +[[kernel.kallsyms]] (6,835,870 samples, 0.01%) + + + +[[kernel.kallsyms]] (10,757,457 samples, 0.02%) + + + +scan_a (3,462,002,037 samples, 6.16%) +scan_a + + +[[kernel.kallsyms]] (11,778,485 samples, 0.02%) + + + +[[kernel.kallsyms]] (47,784,777 samples, 0.09%) + + + +[[kernel.kallsyms]] (11,243,448 samples, 0.02%) + + + +[[kernel.kallsyms]] (8,647,063 samples, 0.02%) + + + +std::__new_allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (35,828,488 samples, 0.06%) + + + +[[kernel.kallsyms]] (9,861,578 samples, 0.02%) + + + +dsacache::Cache::ExecuteCopy (49,871,202 samples, 0.09%) + + + +grow_heap (50,154,117 samples, 0.09%) + + + +[[kernel.kallsyms]] (24,140,143 samples, 0.04%) + + + +[[kernel.kallsyms]] (8,647,063 samples, 0.02%) + + + +[[kernel.kallsyms]] (7,318,881,921 samples, 13.02%) +[[kernel.kallsyms]] + + +[[kernel.kallsyms]] (6,540,584 samples, 0.01%) + + + +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (4,015,533,329 samples, 7.14%) +std::mers.. + + +[[kernel.kallsyms]] (10,376,330 samples, 0.02%) + + + +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + + + +[[kernel.kallsyms]] (9,861,578 samples, 0.02%) + + + +[libstdc++.so.6.0.32] (6,568,910,875 samples, 11.69%) +[libstdc++.so.6.0.. + + +[[kernel.kallsyms]] (5,197,845 samples, 0.01%) + + + +[[kernel.kallsyms]] (80,579,994 samples, 0.14%) + + + +[[kernel.kallsyms]] (11,778,485 samples, 0.02%) + + + +numa_alloc_onnode (59,682,970 samples, 0.11%) + + + +[[kernel.kallsyms]] (45,789,329 samples, 0.08%) + + + +[[kernel.kallsyms]] (50,154,117 samples, 0.09%) + + + +dml::core::dispatcher::hw_dispatcher::hw_dispatcher (23,965,836 samples, 0.04%) + + + +[[kernel.kallsyms]] (52,197,849 samples, 0.09%) + + + +_int_memalign (10,813,731 samples, 0.02%) + + + +__sysfs_device_parse (16,212,211 samples, 0.03%) + + + +[[kernel.kallsyms]] (92,507,760 samples, 0.16%) + + + +[[kernel.kallsyms]] (258,083,965 samples, 0.46%) + + + +[[kernel.kallsyms]] (5,333,154 samples, 0.01%) + + + +[[kernel.kallsyms]] (5,542,336 samples, 0.01%) + + + +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + + + +[[kernel.kallsyms]] (7,240,609,876 samples, 12.88%) +[[kernel.kallsyms]] + + +[[kernel.kallsyms]] (6,066,302 samples, 0.01%) + + + +dml::handler<dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >::allocator_type> dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (78,330,121 samples, 0.14%) + + + +std::allocator_traits<std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> > >::allocate (52,769,643 samples, 0.09%) + + + +[[kernel.kallsyms]] (11,870,294 samples, 0.02%) + + + +[[kernel.kallsyms]] (80,579,994 samples, 0.14%) + + + +[[kernel.kallsyms]] (13,726,772 samples, 0.02%) + + + +[[kernel.kallsyms]] (4,860,205 samples, 0.01%) + + + +[[kernel.kallsyms]] (6,774,676 samples, 0.01%) + + + +[[kernel.kallsyms]] (33,198,896 samples, 0.06%) + + + +[[kernel.kallsyms]] (10,757,457 samples, 0.02%) + + + +[[kernel.kallsyms]] (31,728,822 samples, 0.06%) + + + +[[kernel.kallsyms]] (15,623,013 samples, 0.03%) + + + +[[kernel.kallsyms]] (82,999,130 samples, 0.15%) + + + +[[kernel.kallsyms]] (9,972,283 samples, 0.02%) + + + +[[kernel.kallsyms]] (4,837,589 samples, 0.01%) + + + +[[kernel.kallsyms]] (11,870,294 samples, 0.02%) + + + +[[kernel.kallsyms]] (25,484,485 samples, 0.05%) + + + +dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> >::handler (10,813,731 samples, 0.02%) + + + +[[kernel.kallsyms]] (52,197,849 samples, 0.09%) + + + +__GI___getdelim (7,608,450 samples, 0.01%) + + + +dsacache::Cache::AllocOnNode (77,976,052 samples, 0.14%) + + + +dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> >::get (27,883,144,753 samples, 49.61%) +dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> >::get + + +[[kernel.kallsyms]] (13,547,223 samples, 0.02%) + + + +[[kernel.kallsyms]] (256,851,116 samples, 0.46%) + + + +[[kernel.kallsyms]] (89,913,865 samples, 0.16%) + + + +void std::allocator_traits<std::allocator<std::thread> >::construct<std::thread, void (12,724,581 samples, 0.02%) + + + +std::thread::join (6,839,397 samples, 0.01%) + + + +[[kernel.kallsyms]] (19,162,146 samples, 0.03%) + + + +[[kernel.kallsyms]] (15,623,013 samples, 0.03%) + + + +__pthread_clockjoin_ex (6,839,397 samples, 0.01%) + + + +_mm512_stream_load_si512 (2,993,062,044 samples, 5.33%) +_mm512.. + + +[[kernel.kallsyms]] (12,973,510 samples, 0.02%) + + + +dml::detail::ml::task<std::allocator<unsigned char> >::task (52,769,643 samples, 0.09%) + + + +[[kernel.kallsyms]] (90,779,509 samples, 0.16%) + + + +[[kernel.kallsyms]] (14,066,357 samples, 0.03%) + + + +[[kernel.kallsyms]] (15,623,013 samples, 0.03%) + + + +[[kernel.kallsyms]] (25,315,610 samples, 0.05%) + + + +[[kernel.kallsyms]] (11,870,294 samples, 0.02%) + + + +sh (5,618,491 samples, 0.01%) + + + +[[kernel.kallsyms]] (6,000,961,265 samples, 10.68%) +[[kernel.kallsy.. + + +[[kernel.kallsyms]] (6,835,870 samples, 0.01%) + + + +_IO_new_file_underflow (19,746,317 samples, 0.04%) + + + +[[kernel.kallsyms]] (15,896,933 samples, 0.03%) + + + +[[kernel.kallsyms]] (43,636,433 samples, 0.08%) + + + +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + + + +dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (46,717,102 samples, 0.08%) + + + +[[kernel.kallsyms]] (52,197,849 samples, 0.09%) + + + +[[kernel.kallsyms]] (23,722,633 samples, 0.04%) + + + +[[kernel.kallsyms]] (8,649,622 samples, 0.02%) + + + +all (56,200,887,845 samples, 100%) + + + +devices_init (6,615,510 samples, 0.01%) + + + +[[kernel.kallsyms]] (6,052,541 samples, 0.01%) + + + +[[kernel.kallsyms]] (6,835,870 samples, 0.01%) + + + +[[kernel.kallsyms]] (44,111,847 samples, 0.08%) + + + +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + + + +[[kernel.kallsyms]] (31,692,824 samples, 0.06%) + + + +[[kernel.kallsyms]] (11,169,708 samples, 0.02%) + + + +std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (35,828,488 samples, 0.06%) + + + +[[kernel.kallsyms]] (6,066,302 samples, 0.01%) + + + +[[kernel.kallsyms]] (5,721,684 samples, 0.01%) + + + +[[kernel.kallsyms]] (7,311,966,115 samples, 13.01%) +[[kernel.kallsyms]] + + +[[kernel.kallsyms]] (92,507,760 samples, 0.16%) + + + +[[kernel.kallsyms]] (36,196,373 samples, 0.06%) + + + +[[kernel.kallsyms]] (15,896,933 samples, 0.03%) + + + +[[kernel.kallsyms]] (56,894,061 samples, 0.10%) + + + +[[kernel.kallsyms]] (259,363,157 samples, 0.46%) + + + +[[kernel.kallsyms]] (19,185,252 samples, 0.03%) + + + +[[kernel.kallsyms]] (9,972,283 samples, 0.02%) + + + +dsacache::Cache::SubmitTask (167,119,904 samples, 0.30%) + + + +dsacache::Cache::SubmitTask (291,889,749 samples, 0.52%) + + + +[anon] (52,831,798 samples, 0.09%) + + + +[[kernel.kallsyms]] (6,774,676 samples, 0.01%) + + + +[[kernel.kallsyms]] (26,878,702 samples, 0.05%) + + + +__GI___mmap64 (32,804,268 samples, 0.06%) + + + +[[kernel.kallsyms]] (8,424,524 samples, 0.01%) + + + +std::thread& std::vector<std::thread, std::allocator<std::thread> >::emplace_back<void (13,581,055 samples, 0.02%) + + + +[[kernel.kallsyms]] (7,324,068,091 samples, 13.03%) +[[kernel.kallsyms]] + + +[[kernel.kallsyms]] (13,838,204 samples, 0.02%) + + + +[[kernel.kallsyms]] (5,188,239 samples, 0.01%) + + + +[[kernel.kallsyms]] (13,025,872 samples, 0.02%) + + + +[[kernel.kallsyms]] (22,884,622 samples, 0.04%) +