From 94b3576d5aa127eb5f22d79deab75fc3dc7b2301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Constantin=20F=C3=BCrst?= Date: Wed, 7 Feb 2024 23:07:13 +0100 Subject: [PATCH] publish measurements from benchmark --- qdp_project/evaluation-results/perf.svg | 2604 +++++++++++++---- ...cb0-tcj1-tmul16-wl4294967296-cs2097152.csv | 6 + ...tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv | 6 - ...cb1-tcj1-tmul8-wl4294967296-cs16777216.csv | 6 - ...tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv | 6 + ...cb1-tcj1-tmul8-wl4294967296-cs16777216.csv | 6 - ...tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv | 1 + 7 files changed, 2023 insertions(+), 612 deletions(-) create mode 100644 qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul16-wl4294967296-cs2097152.csv delete mode 100644 qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv delete mode 100644 qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv create mode 100644 qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv delete mode 100644 qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv create mode 100644 qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv diff --git a/qdp_project/evaluation-results/perf.svg b/qdp_project/evaluation-results/perf.svg index 3b1d655..1d8cd77 100644 --- a/qdp_project/evaluation-results/perf.svg +++ b/qdp_project/evaluation-results/perf.svg @@ -430,1192 +430,2608 @@ -futex_wait_queue (1,739,970 samples, 0.01%) - +__count_memcg_events (3,821,882 samples, 0.02%) + -clear_huge_page (327,031,017 samples, 2.43%) -cl.. +update_process_times (5,146,731 samples, 0.03%) + -__mmu_notifier_invalidate_range_end (4,315,626 samples, 0.03%) - +__sysvec_apic_timer_interrupt (7,884,915 samples, 0.05%) + -std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheData>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> >, std::__detail::_Select1st, std::equal_to<unsigned char*>, std::hash<unsigned char*>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >::clear (5,177,974 samples, 0.04%) - +dml::core::hardware_device::submit (12,186,558 samples, 0.07%) + -perf_iterate_ctx (1,724,604 samples, 0.01%) - +unmap_page_range (12,974,723 samples, 0.08%) + -void caching<1ul> (40,431,525 samples, 0.30%) - +__mem_cgroup_charge (12,102,933 samples, 0.07%) + -__mmu_notifier_invalidate_range_end (6,744,844 samples, 0.05%) - +__strstr_avx512 (2,478,575 samples, 0.02%) + -_mid_memalign (25,295,540 samples, 0.19%) - +Aggregation<unsigned long, Sum, (2,644,773,911 samples, 16.19%) +Aggregation<unsigned lon.. -std::__tree_barrier<NopStruct>::wait (1,739,970 samples, 0.01%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (571,144,167 samples, 3.50%) +uns.. -__GI_munmap (1,679,595 samples, 0.01%) - +perf_event_mmap_output (1,530,993 samples, 0.01%) + -_int_memalign (25,295,540 samples, 0.19%) - +exc_page_fault (1,830,044,679 samples, 11.20%) +exc_page_fault -std::chrono::_V2::steady_clock::now (1,691,962,679 samples, 12.55%) -std::chrono::_V2::.. +sysmalloc (21,283,512 samples, 0.13%) + -exc_page_fault (1,819,294,773 samples, 13.50%) -exc_page_fault +free_unref_page_prepare (1,730,235 samples, 0.01%) + -free_unref_page (12,087,054 samples, 0.09%) - +kmem_cache_alloc (1,721,694 samples, 0.01%) + -dml::core::dispatcher::hw_dispatcher::~hw_dispatcher (1,679,595 samples, 0.01%) - +accfg_get_param_long (1,490,139 samples, 0.01%) + -__sysvec_apic_timer_interrupt (2,568,736 samples, 0.02%) - +std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (44,228,686 samples, 0.27%) + -perf_event_mmap (1,724,604 samples, 0.01%) - +folio_add_new_anon_rmap (3,195,706 samples, 0.02%) + -advise_stack_range (1,739,797 samples, 0.01%) - +vm_area_alloc (3,054,209 samples, 0.02%) + -perf_adjust_freq_unthr_context (4,299,293 samples, 0.03%) - +update_process_times (2,170,601 samples, 0.01%) + -qi_flush_dev_iotlb_pasid (1,726,916 samples, 0.01%) - +__x64_sys_munmap (1,728,623 samples, 0.01%) + -unsigned long std::uniform_int_distribution<unsigned long>::operator (2,642,523,934 samples, 19.61%) -unsigned long std::uniform_int.. +page_counter_try_charge (2,596,328 samples, 0.02%) + -grow_heap (25,295,540 samples, 0.19%) - +vma_alloc_folio (12,924,242 samples, 0.08%) + -std::__detail::__waiter_pool::_M_do_wait (1,739,970 samples, 0.01%) - +auto dml::detail::submit<dml::hardware, dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >, dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (57,276,083 samples, 0.35%) + -qi_submit_sync (2,588,710 samples, 0.02%) - +std::thread::thread<void (3,409,299 samples, 0.02%) + -handle_mm_fault (6,022,765 samples, 0.04%) - +ksys_read (1,524,427 samples, 0.01%) + -__folio_alloc (2,574,263 samples, 0.02%) - +all (16,337,436,059 samples, 100%) + + + +perf_adjust_freq_unthr_context (5,492,973 samples, 0.03%) + + + +unmap_vmas (1,728,977 samples, 0.01%) + + + +change_protection (19,560,452 samples, 0.12%) + + + +__folio_alloc (1,479,835,396 samples, 9.06%) +__folio_alloc + + +__x64_sys_mprotect (20,421,763 samples, 0.12%) + + + +perf_adjust_freq_unthr_context (1,541,693 samples, 0.01%) + + + +path_openat (11,335,071 samples, 0.07%) + + + +__GI___fstatat64 (2,319,458 samples, 0.01%) + + + +perf_event_task_tick (1,626,903 samples, 0.01%) + + + +irqentry_exit_to_user_mode (1,731,095 samples, 0.01%) + + + +node_read_meminfo (14,183,508 samples, 0.09%) + + + +openat (3,078,371 samples, 0.02%) + + + +asm_sysvec_apic_timer_interrupt (4,508,335 samples, 0.03%) + + + +try_charge_memcg (4,050,786 samples, 0.02%) + + + +tick_sched_timer (7,126,171 samples, 0.04%) + + + +free_unref_page_list (6,056,971 samples, 0.04%) + + + +exit_to_user_mode_prepare (2,049,407 samples, 0.01%) + + + +folio_add_lru (6,122,334 samples, 0.04%) + + + +asm_sysvec_apic_timer_interrupt (5,156,974 samples, 0.03%) + + + +folio_add_new_anon_rmap (3,082,256 samples, 0.02%) + + + +_mm512_mask_add_epi64 (757,239,970 samples, 4.63%) +_mm51.. + + +exit_to_user_mode_prepare (5,918,508 samples, 0.04%) + + + +__rmqueue_pcplist (2,876,947 samples, 0.02%) + + + +std::thread& std::vector<std::thread, std::allocator<std::thread> >::emplace_back<void (3,409,299 samples, 0.02%) + + + +allocate_stack (3,409,299 samples, 0.02%) + + + +qi_flush_dev_iotlb_pasid (8,586,262 samples, 0.05%) + + + +update_process_times (2,777,240 samples, 0.02%) + + + +tick_sched_timer (1,460,612 samples, 0.01%) + + + +dsacache::CacheData::WaitOnCompletion (1,883,293 samples, 0.01%) + + + +allocate_fake_cpuc (3,459,982 samples, 0.02%) + + + +__hrtimer_run_queues (7,126,171 samples, 0.04%) + + + +internal_get_user_pages_fast (2,228,742 samples, 0.01%) + + + +perf_event_task_tick (5,146,731 samples, 0.03%) + + + +kmalloc_node_trace (1,729,995 samples, 0.01%) + + + +__x64_sys_madvise (9,653,225 samples, 0.06%) + + + +lru_add_fn (2,023,224 samples, 0.01%) + + + +__sysfs_device_parse (1,490,139 samples, 0.01%) + + + +qi_submit_sync (32,864,156 samples, 0.20%) + + + +__x64_sys_get_mempolicy (4,556,948 samples, 0.03%) + + + +qi_submit_sync (42,381,487 samples, 0.26%) + + + +__GI___close_nocancel (6,690,044 samples, 0.04%) + + + +do_syscall_64 (38,464,959 samples, 0.24%) + + + +vma_alloc_folio (3,430,412 samples, 0.02%) + + + +get_page_from_freelist (1,715,406 samples, 0.01%) + + + +void std::vector<int, std::allocator<int> >::_M_range_initialize<int const*> (3,431,863 samples, 0.02%) + + + +vma_prepare (2,582,768 samples, 0.02%) + + + +__alloc_pages (7,939,545 samples, 0.05%) + + + +irqentry_exit_to_user_mode (2,049,407 samples, 0.01%) + + + +vscnprintf (13,571,657 samples, 0.08%) + + + +__GI___mmap64 (50,596,001 samples, 0.31%) + + + +hrtimer_interrupt (7,666,902 samples, 0.05%) + + + +__alloc_pages (1,479,835,396 samples, 9.06%) +__alloc_pages + + +pte_alloc_one (6,060,859 samples, 0.04%) + + + +get_page_from_freelist (1,694,345 samples, 0.01%) + + + +QDPBench (16,337,436,058 samples, 100.00%) +QDPBench + + +charge_memcg (4,050,786 samples, 0.02%) + + + +__libc_start_main_impl (8,541,764,204 samples, 52.28%) +__libc_start_main_impl + + +__GI__IO_doallocbuf (25,072,445 samples, 0.15%) + + + +__kmalloc_node (2,093,173 samples, 0.01%) + + + +charge_memcg (1,722,903 samples, 0.01%) + + + +std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheData>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> >, std::__detail::_Select1st, std::equal_to<unsigned char*>, std::hash<unsigned char*>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >::_Scoped_node::_Scoped_node<unsigned char*, dsacache::CacheData&> (2,231,659 samples, 0.01%) + + + +kernel_get_mempolicy (4,556,948 samples, 0.03%) + + + +charge_memcg (2,598,122 samples, 0.02%) + + + +mbind (5,974,528 samples, 0.04%) + + + +folio_lruvec_lock_irqsave (2,414,711 samples, 0.01%) + + + +__GI_munmap (1,728,623 samples, 0.01%) + + + +perf_event_init_task (19,899,347 samples, 0.12%) + + + +Vector_Loader<unsigned long, (384,094,614 samples, 2.35%) +V.. + + +entry_SYSCALL_64_after_hwframe (1,524,427 samples, 0.01%) + + + +__alloc_file (3,366,665 samples, 0.02%) + + + +scheduler_tick (6,579,035 samples, 0.04%) + + + +std::thread::_M_start_thread (3,409,299 samples, 0.02%) + + + +handle_mm_fault (2,542,727,303 samples, 15.56%) +handle_mm_fault + + +[anon] (2,347,633 samples, 0.01%) + + + +do_user_addr_fault (2,547,045,986 samples, 15.59%) +do_user_addr_fault + + +main (8,539,248,506 samples, 52.27%) +main + + +__GI_munmap (260,351,057 samples, 1.59%) + + + +get_page_from_freelist (3,430,412 samples, 0.02%) + + + +try_charge_memcg (2,357,402 samples, 0.01%) + + + +__mmu_notifier_invalidate_range_end (78,711,041 samples, 0.48%) + + + +lru_add_fn (3,223,510 samples, 0.02%) + + + +__GI___libc_read (21,636,096 samples, 0.13%) + + + +asm_exc_page_fault (3,409,299 samples, 0.02%) + + + +entry_SYSCALL_64_after_hwframe (20,421,763 samples, 0.12%) + + + +do_syscall_64 (19,250,418 samples, 0.12%) + + + +[unknown] (4,568,590 samples, 0.03%) + + + +__vm_munmap (259,486,254 samples, 1.59%) + + + +_raw_spin_lock (6,009,917 samples, 0.04%) + + + +__mod_lruvec_page_state (3,035,337 samples, 0.02%) + + + +__x64_sys_munmap (259,486,254 samples, 1.59%) + + + +clear_huge_page (818,171,051 samples, 5.01%) +clear_.. + + +devices_init (1,487,221 samples, 0.01%) + + + +__kmem_cache_alloc_node (1,729,995 samples, 0.01%) + + + +syscall_exit_to_user_mode (5,918,508 samples, 0.04%) + + + +sysvec_apic_timer_interrupt (2,777,240 samples, 0.02%) + + + +sysvec_apic_timer_interrupt (8,062,003 samples, 0.05%) + + + +__hrtimer_run_queues (5,146,731 samples, 0.03%) + + + +hrtimer_interrupt (7,126,171 samples, 0.04%) + + + +pte_alloc_one (12,504,536 samples, 0.08%) + + + +mas_wr_modify (1,728,362 samples, 0.01%) + + + +vma_merge (5,204,666 samples, 0.03%) + + + +vm_mmap_pgoff (50,596,001 samples, 0.31%) + + + +accfg_get_param_long (4,602,798 samples, 0.03%) + + + +scan_a (1,678,013,450 samples, 10.27%) +scan_a + + +_raw_spin_lock (2,039,788 samples, 0.01%) + + + +exit_to_user_mode_prepare (1,731,095 samples, 0.01%) + + + +syscall (4,556,948 samples, 0.03%) + + + +do_syscall_64 (2,565,568 samples, 0.02%) + + + +do_syscall_64 (50,596,001 samples, 0.31%) + + + +__sysvec_apic_timer_interrupt (1,541,693 samples, 0.01%) + + + +unmap_region (1,728,623 samples, 0.01%) + + + +__rmqueue_pcplist (12,992,411 samples, 0.08%) + + + +__hrtimer_run_queues (2,714,507 samples, 0.02%) + + + +intel_invalidate_range (78,711,041 samples, 0.48%) + + + +perf_iterate_ctx (32,825,072 samples, 0.20%) + + + +__split_vma (2,597,797 samples, 0.02%) + + + +count_memcg_events.constprop.0 (3,821,882 samples, 0.02%) + + + +do_anonymous_page (2,419,571 samples, 0.01%) + + + +alloc_fd (2,581,399 samples, 0.02%) + + + +walk_component (2,915,734 samples, 0.02%) + + + +asm_exc_page_fault (1,731,639 samples, 0.01%) + + + +get_page_from_freelist (1,732,237 samples, 0.01%) + + + +qi_submit_sync (7,061,195 samples, 0.04%) + + + +numa_node_to_cpus (2,831,897 samples, 0.02%) + + + +_raw_spin_lock_irqsave (1,505,920 samples, 0.01%) + + + +std::pair<std::__detail::_Node_iterator<std::pair<unsigned char* const, dsacache::CacheData>, false, false>, bool> std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheData>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> >, std::__detail::_Select1st, std::equal_to<unsigned char*>, std::hash<unsigned char*>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >::emplace<unsigned char*, dsacache::CacheData&> (2,231,659 samples, 0.01%) + + + +void std::destroy_at<std::pair<unsigned char* const, dsacache::CacheData> > (262,080,827 samples, 1.60%) + + + +sysvec_apic_timer_interrupt (2,724,344 samples, 0.02%) + + + +__hrtimer_run_queues (1,460,612 samples, 0.01%) + + + +clear_page_erms (309,607,106 samples, 1.90%) +c.. + + +exc_page_fault (2,547,614,283 samples, 15.59%) +exc_page_fault + + +dsacache::CacheData::Deallocate (260,351,057 samples, 1.59%) + + + +__GI__IO_doallocbuf (25,072,445 samples, 0.15%) + + + +add_wq (8,186,564 samples, 0.05%) + + + +std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_nodes (262,080,827 samples, 1.60%) + + + +do_sys_openat2 (18,616,272 samples, 0.11%) + + + +do_syscall_64 (259,486,254 samples, 1.59%) + + + +__handle_mm_fault (2,542,611 samples, 0.02%) + + + +read (1,524,427 samples, 0.01%) + + + +_int_memalign (44,228,686 samples, 0.27%) + + + +vma_alloc_folio (1,640,530,564 samples, 10.04%) +vma_alloc_folio + + +__cond_resched (7,615,401 samples, 0.05%) + + + +sysmalloc (42,505,783 samples, 0.26%) + + + +kmem_cache_alloc_bulk (1,585,774 samples, 0.01%) + + + +kernel_mbind (5,974,528 samples, 0.04%) + + + +__handle_mm_fault (2,537,101,790 samples, 15.53%) +__handle_mm_fault + + +clear_page_erms (1,231,860,598 samples, 7.54%) +clear_page.. + + +qi_flush_dev_iotlb_pasid (10,837,698 samples, 0.07%) + + + +mas_prev_nentry (1,563,935 samples, 0.01%) + + + +decltype (3,409,299 samples, 0.02%) + + + +check_preemption_disabled (1,676,042 samples, 0.01%) + + + +do_mmap (50,596,001 samples, 0.31%) + + + +__alloc_pages (3,430,412 samples, 0.02%) + + + +vfs_fstatat (1,512,504 samples, 0.01%) + + + +__GI___mmap64 (50,596,001 samples, 0.31%) + + + +__GI_munmap (18,146,766 samples, 0.11%) + + + +folio_add_lru (7,530,893 samples, 0.05%) + + + +intel_cpuc_prepare (1,729,995 samples, 0.01%) + + + +do_syscall_64 (1,524,427 samples, 0.01%) + + + +do_syscall_64 (20,421,763 samples, 0.12%) + + + +entry_SYSCALL_64_after_hwframe (4,556,948 samples, 0.03%) + + + +perf_iterate_sb.constprop.0 (32,825,072 samples, 0.20%) + + + +sum_check (3,317,095,812 samples, 20.30%) +sum_check + + +std::pair<std::__detail::_Node_iterator<std::pair<unsigned char* const, dsacache::CacheData>, false, false>, bool> std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheData>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> >, std::__detail::_Select1st, std::equal_to<unsigned char*>, std::hash<unsigned char*>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >::_M_emplace<unsigned char*, dsacache::CacheData&> (2,231,659 samples, 0.01%) + + + +tlb_finish_mmu (161,743,813 samples, 0.99%) + + + +__next_zones_zonelist (3,096,950 samples, 0.02%) + + + +memcg_check_events (4,262,574 samples, 0.03%) + + + +kmalloc_trace (1,729,987 samples, 0.01%) + + + +sysvec_apic_timer_interrupt (2,163,378 samples, 0.01%) + + + +dsacache::Cache::Access (2,844,259,210 samples, 17.41%) +dsacache::Cache::Access + + +qi_flush_dev_iotlb_pasid (7,061,195 samples, 0.04%) + + + +clear_huge_page (2,583,148 samples, 0.02%) + + + +copy_process (22,478,709 samples, 0.14%) + + + +do_syscall_64 (18,146,766 samples, 0.11%) + + + +dsacache::Cache::GetFromCache (2,051,374 samples, 0.01%) + + + +do_sys_openat2 (2,565,568 samples, 0.02%) + + + +_raw_spin_lock (2,581,399 samples, 0.02%) + + + +sysfs_kf_seq_show (14,183,508 samples, 0.09%) + + + +format_decode (3,780,724 samples, 0.02%) + + + +__mmu_notifier_invalidate_range_end (26,909,047 samples, 0.16%) + + + +_IO_new_fclose (7,527,675 samples, 0.05%) + + + +asm_exc_page_fault (1,722,903 samples, 0.01%) + + + +__mem_cgroup_charge (1,722,903 samples, 0.01%) + + + +__folio_alloc (1,694,345 samples, 0.01%) + + + +_mm512_mask_testn_epi8_mask (1,617,669 samples, 0.01%) + + + +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::_M_gen_rand (446,356,922 samples, 2.73%) +st.. + + +dml::detail::ml::impl::hardware::submit (12,186,558 samples, 0.07%) + + + +vfs_read (1,524,427 samples, 0.01%) + + + +aggr_j (2,660,666,540 samples, 16.29%) +aggr_j + + +clear_page_erms (1,325,383,434 samples, 8.11%) +clear_page_.. + + +dsacache::Cache::ExecuteCopy (57,276,083 samples, 0.35%) + + + +qi_flush_piotlb (16,071,349 samples, 0.10%) + + + +__handle_mm_fault (1,722,903 samples, 0.01%) + + + +accfg_get_param_str (3,070,305 samples, 0.02%) + + + +scheduler_tick (2,170,601 samples, 0.01%) + + + +preempt_count_add (1,462,822 samples, 0.01%) + + + +do_mprotect_pkey (38,464,959 samples, 0.24%) + + + +device_parse (8,186,564 samples, 0.05%) + + + +numa_bitmask_clearall (2,584,213 samples, 0.02%) + + + +scheduler_tick (2,777,240 samples, 0.02%) + + + +mas_prev (1,563,935 samples, 0.01%) + + + +syscall_exit_to_user_mode (2,109,428 samples, 0.01%) + + + +do_syscall_64 (4,556,948 samples, 0.03%) + + + +do_mbind (5,974,528 samples, 0.04%) + + + +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (571,144,167 samples, 3.50%) +uns.. + + +exc_page_fault (3,280,828 samples, 0.02%) + + + +handle_mm_fault (6,013,560 samples, 0.04%) + + + +mem_cgroup_charge_statistics (1,722,903 samples, 0.01%) + + + +hrtimer_interrupt (5,146,731 samples, 0.03%) + + + +__memcpy (3,104,328 samples, 0.02%) + + + +do_user_addr_fault (3,280,828 samples, 0.02%) + + + +dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (44,228,686 samples, 0.27%) + + + +perf_event_alloc (15,574,048 samples, 0.10%) + + + +__GI___libc_malloc (22,752,987 samples, 0.14%) + + + +mas_alloc_nodes (1,585,774 samples, 0.01%) + + + +do_huge_pmd_anonymous_page (2,494,709,541 samples, 15.27%) +do_huge_pmd_anonymous_p.. + + +scheduler_tick (5,146,731 samples, 0.03%) + + + +mod_memcg_state (2,422,137 samples, 0.01%) + + + +syscall (2,285,719 samples, 0.01%) + + + +free_tail_page_prepare (12,096,311 samples, 0.07%) + + + +tick_sched_handle (1,541,693 samples, 0.01%) + + + +__alloc_pages (1,694,345 samples, 0.01%) + + + +__GI___libc_read (21,636,096 samples, 0.13%) + + + +do_vmi_munmap (18,146,766 samples, 0.11%) + + + +__mmu_notifier_invalidate_range_end (9,653,225 samples, 0.06%) + + + +__folio_alloc (11,611,654 samples, 0.07%) + + + +Sum<unsigned long>::simd_agg (757,239,970 samples, 4.63%) +Sum<u.. -dml::detail::ml::buffer<std::allocator<unsigned char>, dml::detail::descriptor, dml::detail::completion_record>::buffer (25,295,540 samples, 0.19%) - +vfs_read (19,372,267 samples, 0.12%) + -std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (959,605,276 samples, 7.12%) -std::mers.. +__GI_exit (2,515,698 samples, 0.02%) + -do_huge_pmd_anonymous_page (4,311,772 samples, 0.03%) - +tick_sched_handle (1,460,612 samples, 0.01%) + -grow_heap (6,744,844 samples, 0.05%) - +inherit_task_group.isra.0 (19,899,347 samples, 0.12%) + -Filter<unsigned long, LT, (5,411,903,209 samples, 40.15%) -Filter<unsigned long, LT, +do_anonymous_page (38,255,303 samples, 0.23%) + -vma_alloc_folio (2,574,263 samples, 0.02%) - +mem_cgroup_charge_statistics (3,814,030 samples, 0.02%) + -__mmu_notifier_invalidate_range_end (23,570,936 samples, 0.17%) - +__GI___libc_read (1,524,427 samples, 0.01%) + -__GI_munmap (17,268,198 samples, 0.13%) - +entry_SYSCALL_64_after_hwframe (7,728,229 samples, 0.05%) + -entry_SYSCALL_64_after_hwframe (2,824,555 samples, 0.02%) - +entry_SYSCALL_64_after_hwframe (1,728,623 samples, 0.01%) + -sysvec_apic_timer_interrupt (6,036,217 samples, 0.04%) - +perf_iterate_ctx (6,351,246 samples, 0.04%) + -dml::handler<dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >::allocator_type> dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (31,098,656 samples, 0.23%) - +get_page_from_freelist (1,638,023,280 samples, 10.03%) +get_page_from_.. -syscall (1,739,970 samples, 0.01%) - +do_madvise (9,653,225 samples, 0.06%) + -_raw_spin_lock_irqsave (1,574,388 samples, 0.01%) - +accfg_device_get_first (1,487,221 samples, 0.01%) + -do_syscall_64 (2,824,555 samples, 0.02%) - +get_page_from_freelist (1,479,835,396 samples, 9.06%) +get_page_from.. -auto dml::detail::submit<dml::hardware, dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >, dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (31,098,656 samples, 0.23%) - +x86_pmu_event_init (6,918,889 samples, 0.04%) + -dsacache::Cache::Access (40,431,525 samples, 0.30%) - +do_user_addr_fault (1,830,044,679 samples, 11.20%) +do_user_addr_fault -unsigned long std::uniform_int_distribution<unsigned long>::operator (2,636,449,788 samples, 19.56%) -unsigned long std::uniform_int.. +__mmu_notifier_invalidate_range (1,729,045 samples, 0.01%) + -asm_exc_page_fault (1,821,899,562 samples, 13.52%) -asm_exc_page_fault +update_load_avg (1,445,866 samples, 0.01%) + -__GI__IO_file_doallocate (7,607,464 samples, 0.06%) - +exc_page_fault (1,722,903 samples, 0.01%) + -sync_regs (2,604,789 samples, 0.02%) - +memcg_slab_post_alloc_hook (1,486,139 samples, 0.01%) + -qi_flush_piotlb (10,915,702 samples, 0.08%) - +__mod_zone_page_state (1,731,117 samples, 0.01%) + -qi_submit_sync (1,726,916 samples, 0.01%) - +mbind_range (3,444,002 samples, 0.02%) + -std::barrier<NopStruct>::arrive_and_wait (1,739,970 samples, 0.01%) - +__fput (5,918,508 samples, 0.04%) + -try_charge_memcg (1,735,920 samples, 0.01%) - +update_process_times (1,460,612 samples, 0.01%) + -dsacache::Cache::AllocOnNode (9,332,869 samples, 0.07%) - +entry_SYSCALL_64_after_hwframe (5,974,528 samples, 0.04%) + -perf_event_task_tick (20,760,710 samples, 0.15%) - +do_vmi_align_munmap (259,486,254 samples, 1.59%) + -perf_event_task_tick (4,299,293 samples, 0.03%) - +__list_del_entry_valid (1,456,703 samples, 0.01%) + -__x64_sys_munmap (1,679,595 samples, 0.01%) - +__mmu_notifier_invalidate_range (76,977,428 samples, 0.47%) + -copy_process (3,468,037 samples, 0.03%) - +vsnprintf (13,571,657 samples, 0.08%) + -std::common_type<std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >::type std::chrono::operator-<std::chrono::_V2::steady_clock, std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > > (19,094,643 samples, 0.14%) - +void fill_mt<unsigned long> (4,936,796,643 samples, 30.22%) +void fill_mt<unsigned long> -__GI_madvise (1,739,797 samples, 0.01%) - +unmap_region (254,294,478 samples, 1.56%) + -unmap_region (4,315,626 samples, 0.03%) - +qi_submit_sync (36,329,554 samples, 0.22%) + -dml::core::dispatcher::hw_dispatcher::hw_dispatcher (4,958,906 samples, 0.04%) - +kmem_cache_alloc (3,054,209 samples, 0.02%) + -zap_huge_pmd (2,590,090 samples, 0.02%) - +asm_exc_page_fault (2,590,206,895 samples, 15.85%) +asm_exc_page_fault -std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_nodes (5,177,974 samples, 0.04%) - +operator new (44,228,686 samples, 0.27%) + -void std::__detail::__waiter<std::integral_constant<bool, true> >::_M_do_wait<std::__tree_barrier<NopStruct>::wait (1,739,970 samples, 0.01%) - +__memcg_kmem_charge_page (6,224,139 samples, 0.04%) + -std::chrono::_V2::steady_clock::now (1,714,183,943 samples, 12.72%) -std::chrono::_V2::s.. +__do_sys_newfstatat (1,512,504 samples, 0.01%) + -dsacache::Cache::SubmitTask (40,431,525 samples, 0.30%) - +grow_heap (20,421,763 samples, 0.12%) + -__GI__IO_doallocbuf (7,607,464 samples, 0.06%) - +perf_iterate_sb.constprop.0 (6,351,246 samples, 0.04%) + -tick_sched_timer (5,167,785 samples, 0.04%) - +mas_wr_node_store (1,728,362 samples, 0.01%) + -__memcg_kmem_charge_page (2,603,722 samples, 0.02%) - +devices_init (2,466,668 samples, 0.02%) + -perf_iterate_sb.constprop.0 (1,724,604 samples, 0.01%) - +page_remove_rmap (1,729,498 samples, 0.01%) + -entry_SYSCALL_64_after_hwframe (1,739,970 samples, 0.01%) - +__sysvec_apic_timer_interrupt (2,714,507 samples, 0.02%) + -__list_del_entry_valid (1,735,652 samples, 0.01%) - +clear_page_erms (778,310,223 samples, 4.76%) +clear.. -do_syscall_64 (1,739,970 samples, 0.01%) - +do_filp_open (1,537,896 samples, 0.01%) + -scheduler_tick (1,704,757 samples, 0.01%) - +__GI_mprotect (39,224,955 samples, 0.24%) + -perf_event_task_tick (1,704,757 samples, 0.01%) - +__strncasecmp_l_evex (2,453,327 samples, 0.02%) + -__GI___getdelim (8,469,848 samples, 0.06%) - +__vm_munmap (18,146,766 samples, 0.11%) + -do_huge_pmd_anonymous_page (1,816,689,561 samples, 13.48%) -do_huge_pmd_anonymou.. +sysvec_apic_timer_interrupt (5,156,974 samples, 0.03%) + -dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (25,295,540 samples, 0.19%) - +perf_adjust_freq_unthr_context (5,146,731 samples, 0.03%) + -perf_event_init_task (2,599,737 samples, 0.02%) - +update_process_times (1,541,693 samples, 0.01%) + -qi_submit_sync (1,679,595 samples, 0.01%) - +internal_get_user_pages_fast (2,670,813 samples, 0.02%) + -__GI_exit (1,679,595 samples, 0.01%) - +dml::core::dispatcher::hw_dispatcher::~hw_dispatcher (1,728,623 samples, 0.01%) + -tick_sched_handle (20,760,710 samples, 0.15%) - +sync_regs (12,011,565 samples, 0.07%) + -[[vdso]] (1,255,679,649 samples, 9.32%) -[[vdso]] +qi_flush_piotlb (10,974,190 samples, 0.07%) + -update_process_times (2,568,736 samples, 0.02%) - +__x64_sys_openat (18,616,272 samples, 0.11%) + -asm_sysvec_apic_timer_interrupt (2,568,736 samples, 0.02%) - +do_dentry_open (2,033,850 samples, 0.01%) + -do_syscall_64 (3,468,037 samples, 0.03%) - +dml::core::dispatcher::hw_queue::initialize_new_queue (1,537,469 samples, 0.01%) + -get_page_from_freelist (2,574,263 samples, 0.02%) - +task_tick_fair (1,445,866 samples, 0.01%) + -clear_page_erms (1,737,509 samples, 0.01%) - +memcg_account_kmem (2,422,137 samples, 0.01%) + -do_vmi_align_munmap (1,679,595 samples, 0.01%) - +__GI___libc_malloc (2,231,659 samples, 0.01%) + -do_user_addr_fault (6,888,173 samples, 0.05%) - +__fopen_internal (20,850,721 samples, 0.13%) + -std::common_type<std::chrono::duration<long, std::ratio<1l, 1000000000l> >, std::chrono::duration<long, std::ratio<1l, 1000000000l> > >::type std::chrono::operator-<long, std::ratio<1l, 1000000000l>, long, std::ratio<1l, 1000000000l> > (19,094,643 samples, 0.14%) - +__sysvec_apic_timer_interrupt (5,146,731 samples, 0.03%) + -sysvec_apic_timer_interrupt (20,760,710 samples, 0.15%) - +dml::handler<dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >::allocator_type> dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (57,276,083 samples, 0.35%) + -unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (1,677,102,236 samples, 12.44%) -unsigned int std::.. +do_vmi_munmap (1,728,623 samples, 0.01%) + -tick_sched_timer (2,573,741 samples, 0.02%) - +syscall (5,974,528 samples, 0.04%) + -entry_SYSCALL_64_after_hwframe (1,739,797 samples, 0.01%) - +kernel_get_mempolicy (5,075,534 samples, 0.03%) + -do_filp_open (2,824,555 samples, 0.02%) - +__kmem_cache_alloc_node (2,093,173 samples, 0.01%) + -unmap_region (17,268,198 samples, 0.13%) - +mtree_load (1,843,021 samples, 0.01%) + -std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (25,295,540 samples, 0.19%) - +arch_get_unmapped_area_topdown (1,563,935 samples, 0.01%) + -change_protection (23,570,936 samples, 0.17%) - +mod_objcg_state (1,543,680 samples, 0.01%) + -intel_invalidate_range (4,315,626 samples, 0.03%) - +do_syscall_64 (9,653,225 samples, 0.06%) + -free_tail_page_prepare (9,496,754 samples, 0.07%) - +tick_sched_handle (5,146,731 samples, 0.03%) + -asm_exc_page_fault (6,888,173 samples, 0.05%) - +__GI___libc_read (1,524,427 samples, 0.01%) + -folio_add_lru (1,574,388 samples, 0.01%) - +__GI___libc_malloc (3,431,863 samples, 0.02%) + -update_process_times (5,167,785 samples, 0.04%) - +__GI___getdelim (49,290,423 samples, 0.30%) + -__run_exit_handlers (1,679,595 samples, 0.01%) - +__libc_open64 (19,250,418 samples, 0.12%) + -Sum<unsigned long>::simd_agg (3,349,026 samples, 0.02%) - +mmap_region (48,171,543 samples, 0.29%) + -schedule (1,739,970 samples, 0.01%) - +__libc_start_call_main (8,541,764,204 samples, 52.28%) +__libc_start_call_main -qi_flush_piotlb (2,588,710 samples, 0.02%) - +dml::detail::ml::task<std::allocator<unsigned char> >::task (44,228,686 samples, 0.27%) + -[[vdso]] (1,207,632,714 samples, 8.96%) -[[vdso]] +perf_try_init_event (6,918,889 samples, 0.04%) + -dsacache::Cache::ExecuteCopy (31,098,656 samples, 0.23%) - +handle_mm_fault (3,280,828 samples, 0.02%) + -__libc_openat64 (2,824,555 samples, 0.02%) - +__strcasestr (4,045,089 samples, 0.02%) + -hrtimer_interrupt (2,573,741 samples, 0.02%) - +__x64_sys_mprotect (38,464,959 samples, 0.24%) + -__GI__IO_doallocbuf (7,607,464 samples, 0.06%) - +dsacache::Cache::AllocOnNode (2,768,859,594 samples, 16.95%) +dsacache::Cache::AllocOnNode -clone3 (7,578,221,982 samples, 56.23%) -clone3 +__hrtimer_run_queues (1,541,693 samples, 0.01%) + -mprotect_fixup (25,295,540 samples, 0.19%) - +numa_node_to_cpus (2,584,213 samples, 0.02%) + -__schedule (1,739,970 samples, 0.01%) - +std::__new_allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (44,228,686 samples, 0.27%) + -__vm_munmap (1,679,595 samples, 0.01%) - +charge_memcg (2,596,328 samples, 0.02%) + -entry_SYSCALL_64_after_hwframe (1,679,595 samples, 0.01%) - +entry_SYSCALL_64_after_hwframe (18,146,766 samples, 0.11%) + -__x64_sys_munmap (17,268,198 samples, 0.13%) - +kvfree_call_rcu (2,652,940 samples, 0.02%) + -QDPBench (13,478,052,093 samples, 100.00%) -QDPBench +unsigned long std::uniform_int_distribution<unsigned long>::operator (2,657,438,501 samples, 16.27%) +unsigned long std::unifor.. -__hrtimer_run_queues (2,573,741 samples, 0.02%) - +perf_adjust_freq_unthr_context (1,626,903 samples, 0.01%) + -__sysvec_apic_timer_interrupt (3,442,758 samples, 0.03%) - +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (1,671,517,566 samples, 10.23%) +unsigned int st.. -std::pair<unsigned char* const, dsacache::CacheData>::~pair (5,177,974 samples, 0.04%) - +lru_gen_add_folio (2,065,739 samples, 0.01%) + -__do_sys_clone3 (3,468,037 samples, 0.03%) - +krc_this_cpu_lock (1,791,831 samples, 0.01%) + -__alloc_pages (2,574,263 samples, 0.02%) - +perf_event_task_tick (5,671,208 samples, 0.03%) + -void std::__detail::__platform_wait<int> (1,739,970 samples, 0.01%) - +two_way_short_needle (3,184,226 samples, 0.02%) + -do_vmi_munmap (5,177,974 samples, 0.04%) - +do_anonymous_page (2,598,122 samples, 0.02%) + -__hrtimer_run_queues (5,167,785 samples, 0.04%) - +tick_sched_timer (2,714,507 samples, 0.02%) + -get_page_from_freelist (1,474,202,521 samples, 10.94%) -get_page_from_fr.. +scheduler_tick (1,541,693 samples, 0.01%) + -do_syscall_64 (6,744,844 samples, 0.05%) - +asm_sysvec_apic_timer_interrupt (1,415,984 samples, 0.01%) + -unmap_page_range (2,590,090 samples, 0.02%) - +hrtimer_interrupt (2,777,240 samples, 0.02%) + -do_anonymous_page (1,710,993 samples, 0.01%) - +check_preemption_disabled (1,713,789 samples, 0.01%) + -kernfs_fop_open (1,241,282 samples, 0.01%) - +__GI_madvise (9,653,225 samples, 0.06%) + -folio_batch_move_lru (1,574,388 samples, 0.01%) - +perf_event_task_tick (5,492,973 samples, 0.03%) + -unmap_region (1,679,595 samples, 0.01%) - +intel_invalidate_range (76,977,428 samples, 0.47%) + -clear_page_erms (2,574,263 samples, 0.02%) - +sysvec_apic_timer_interrupt (1,415,984 samples, 0.01%) + -asm_sysvec_apic_timer_interrupt (6,034,647 samples, 0.04%) - +unmap_vmas (1,728,623 samples, 0.01%) + -_start (5,290,426,542 samples, 39.25%) -_start +intel_cpuc_finish (1,730,138 samples, 0.01%) + -do_sys_openat2 (2,824,555 samples, 0.02%) - +down_read (1,551,814 samples, 0.01%) + -do_vmi_munmap (1,679,595 samples, 0.01%) - +hrtimer_interrupt (2,714,507 samples, 0.02%) + -dml::core::dispatcher::hw_dispatcher::get_instance (4,958,907 samples, 0.04%) - +down_write (2,822,695 samples, 0.02%) + -__sysvec_apic_timer_interrupt (20,760,710 samples, 0.15%) - +vma_alloc_folio (1,479,835,396 samples, 9.06%) +vma_alloc_folio -perf_adjust_freq_unthr_context (20,760,710 samples, 0.15%) - +release_pages (84,766,385 samples, 0.52%) + -sysmalloc (7,607,464 samples, 0.06%) - +_IO_new_file_close_it (6,690,044 samples, 0.04%) + -qi_flush_dev_iotlb_pasid (4,174,802 samples, 0.03%) - +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (571,144,167 samples, 3.50%) +std.. -clock_gettime@plt (33,987,260 samples, 0.25%) - +qi_submit_sync (44,113,272 samples, 0.27%) + -add_wq (4,519,291 samples, 0.03%) - +std::__new_allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> >::allocate (2,231,659 samples, 0.01%) + -folio_lruvec_lock_irqsave (1,574,388 samples, 0.01%) - +__hrtimer_run_queues (2,777,240 samples, 0.02%) + -__GI___libc_malloc (7,607,464 samples, 0.06%) - +inode_permission (1,722,389 samples, 0.01%) + -do_dentry_open (1,241,282 samples, 0.01%) - +numa_node_of_cpu (4,248,901 samples, 0.03%) + -unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (608,239,162 samples, 4.51%) -unsig.. +clone3 (7,216,796,516 samples, 44.17%) +clone3 -dsacache::CacheData::~CacheData (5,177,974 samples, 0.04%) - +qi_submit_sync (10,837,698 samples, 0.07%) + -__handle_mm_fault (6,022,765 samples, 0.04%) - +__x64_sys_openat (2,565,568 samples, 0.02%) + -update_process_times (20,760,710 samples, 0.15%) - +folio_batch_move_lru (4,953,093 samples, 0.03%) + -sysmalloc (25,295,540 samples, 0.19%) - +hrtimer_interrupt (1,541,693 samples, 0.01%) + -__x64_sys_munmap (5,177,974 samples, 0.04%) - +__x64_sys_get_mempolicy (5,075,534 samples, 0.03%) + -unmap_vmas (4,315,626 samples, 0.03%) - +__GI_mprotect (20,421,763 samples, 0.12%) + -intel_invalidate_range (1,679,595 samples, 0.01%) - +dml::core::dispatcher::hw_dispatcher::get_instance (12,186,558 samples, 0.07%) + -scan_b (40,431,526 samples, 0.30%) - +__kmem_cache_free (2,404,641 samples, 0.01%) + -unmap_vmas (1,679,595 samples, 0.01%) - +do_user_addr_fault (6,013,560 samples, 0.04%) + -entry_SYSCALL_64_after_hwframe (5,177,974 samples, 0.04%) - +get_mem_cgroup_from_mm (3,711,864 samples, 0.02%) + -accfg_get_param_long (3,788,940 samples, 0.03%) - +tick_sched_timer (2,777,240 samples, 0.02%) + -std::allocator_traits<std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> > >::allocate (25,295,540 samples, 0.19%) - +path_openat (1,537,896 samples, 0.01%) + -unsigned long std::uniform_int_distribution<unsigned long>::operator (608,239,162 samples, 4.51%) -unsig.. +dml::core::dispatcher::hw_device::initialize_new_device (10,229,766 samples, 0.06%) + -unmap_vmas (2,590,090 samples, 0.02%) - +free_pcppages_bulk (4,326,961 samples, 0.03%) + -std::__new_allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (25,295,540 samples, 0.19%) - +do_anonymous_page (1,722,903 samples, 0.01%) + -intel_invalidate_range (6,744,844 samples, 0.05%) - +do_user_addr_fault (1,722,903 samples, 0.01%) + -sysvec_apic_timer_interrupt (2,568,736 samples, 0.02%) - +_mid_memalign (44,228,686 samples, 0.27%) + -dml::core::hardware_device::submit (5,803,116 samples, 0.04%) - +__handle_mm_fault (6,013,560 samples, 0.04%) + -dml::detail::ml::task<std::allocator<unsigned char> >::task (25,295,540 samples, 0.19%) - +lru_gen_del_folio.constprop.0 (4,325,621 samples, 0.03%) + -dsacache::CacheData::WaitOnCompletion (5,177,974 samples, 0.04%) - +__mem_cgroup_charge (2,598,122 samples, 0.02%) + -do_syscall_64 (5,177,974 samples, 0.04%) - +dsacache::Cache::SubmitTask (12,186,558 samples, 0.07%) + -do_futex (1,739,970 samples, 0.01%) - +dsacache::Cache::GetCacheNode (8,805,849 samples, 0.05%) + -inherit_task_group.isra.0 (2,599,737 samples, 0.02%) - +vma_merge (3,444,002 samples, 0.02%) + -qi_submit_sync (4,174,802 samples, 0.03%) - +asm_exc_page_fault (3,280,828 samples, 0.02%) + -perf_event_alloc (1,734,813 samples, 0.01%) - +__hrtimer_run_queues (7,666,902 samples, 0.05%) + -__x64_sys_futex (1,739,970 samples, 0.01%) - +_int_malloc (44,228,686 samples, 0.27%) + -_int_malloc (7,607,464 samples, 0.06%) - +std::__new_allocator<int>::allocate (3,431,863 samples, 0.02%) + -do_user_addr_fault (1,819,294,773 samples, 13.50%) -do_user_addr_fault +mutex_unlock (1,578,372 samples, 0.01%) + -__rmqueue_pcplist (8,678,276 samples, 0.06%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (2,654,842,180 samples, 16.25%) +unsigned long std::unifor.. -__GI___clock_gettime (1,631,208,158 samples, 12.10%) -__GI___clock_gettime +vma_alloc_folio (1,559,530 samples, 0.01%) + -__vdso_clock_gettime (1,595,028,674 samples, 11.83%) -__vdso_clock_gett.. +_start (8,541,764,204 samples, 52.28%) +_start -entry_SYSCALL_64_after_hwframe (17,268,198 samples, 0.13%) - +__sysfs_device_parse (8,186,564 samples, 0.05%) + -__sysvec_apic_timer_interrupt (6,036,217 samples, 0.04%) - +__alloc_pages (1,638,884,491 samples, 10.03%) +__alloc_pages -dsacache::Cache::SubmitTask (5,803,116 samples, 0.04%) - +get_page_from_freelist (7,950,647 samples, 0.05%) + -hrtimer_interrupt (20,760,710 samples, 0.15%) - +qi_flush_piotlb (2,592,030 samples, 0.02%) + -_mm512_mask_add_epi64 (3,349,026 samples, 0.02%) - +do_syscall_64 (1,512,504 samples, 0.01%) + -__handle_mm_fault (1,818,426,587 samples, 13.49%) -__handle_mm_fault +qi_flush_dev_iotlb_pasid (36,329,554 samples, 0.22%) + -__x64_sys_openat (2,824,555 samples, 0.02%) - +do_huge_pmd_anonymous_page (6,013,560 samples, 0.04%) + -__libc_start_main_impl (5,290,426,542 samples, 39.25%) -__libc_start_main_impl +do_vmi_align_munmap (18,146,766 samples, 0.11%) + -scheduler_tick (4,299,293 samples, 0.03%) - +do_huge_pmd_anonymous_page (2,542,611 samples, 0.02%) + -do_vmi_align_munmap (5,177,974 samples, 0.04%) - +_raw_spin_lock (1,730,541 samples, 0.01%) + -tick_sched_handle (5,167,785 samples, 0.04%) - +entry_SYSCALL_64_after_hwframe (20,775,227 samples, 0.13%) + -asm_sysvec_apic_timer_interrupt (6,893,223 samples, 0.05%) - +do_syscall_64 (6,690,044 samples, 0.04%) + -tlb_finish_mmu (14,678,108 samples, 0.11%) - +dml::core::dispatcher::hw_dispatcher::hw_dispatcher (12,186,558 samples, 0.07%) + -void std::__atomic_wait_address<std::__barrier_phase_t, std::__tree_barrier<NopStruct>::wait (1,739,970 samples, 0.01%) - +update_process_times (7,126,171 samples, 0.04%) + -operator new (25,295,540 samples, 0.19%) - +dsacache::Cache::Access (14,009,326 samples, 0.09%) + -__x64_sys_mprotect (25,295,540 samples, 0.19%) - +__GI__IO_file_open (19,250,418 samples, 0.12%) + -do_vmi_align_munmap (17,268,198 samples, 0.13%) - +__mod_memcg_state (1,561,926 samples, 0.01%) + -__folio_alloc (1,475,070,703 samples, 10.94%) -__folio_alloc +std::allocator_traits<std::allocator<int> >::allocate (3,431,863 samples, 0.02%) + -__vm_munmap (5,177,974 samples, 0.04%) - +down_write (8,479,830 samples, 0.05%) + -__vdso_clock_gettime (1,597,729,157 samples, 11.85%) -__vdso_clock_gett.. +clear_page_erms (2,147,468 samples, 0.01%) + -handle_mm_fault (1,818,426,587 samples, 13.49%) -handle_mm_fault +scan_b (2,845,120,776 samples, 17.41%) +scan_b -start_thread (7,574,753,945 samples, 56.20%) -start_thread +std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_node (262,080,827 samples, 1.60%) + -openat (2,824,555 samples, 0.02%) - +device_parse (1,490,139 samples, 0.01%) + -__GI_mprotect (25,295,540 samples, 0.19%) - +do_syscall_64 (23,342,525 samples, 0.14%) + -__GI_mprotect (6,744,844 samples, 0.05%) - +__mmu_notifier_invalidate_range_end (19,560,452 samples, 0.12%) + -scan_a (5,412,771,977 samples, 40.16%) -scan_a +tick_sched_handle (7,126,171 samples, 0.04%) + -auto dml::detail::ml::make_mem_move_task<std::allocator<unsigned char> > (25,295,540 samples, 0.19%) - +do_filp_open (11,335,071 samples, 0.07%) + -tick_sched_handle (1,704,757 samples, 0.01%) - +std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheData>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> >, std::__detail::_Select1st, std::equal_to<unsigned char*>, std::hash<unsigned char*>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >::clear (262,080,827 samples, 1.60%) + -change_protection (6,744,844 samples, 0.05%) - +qi_submit_sync (10,974,190 samples, 0.07%) + -wqs_init (4,519,291 samples, 0.03%) - +__run_exit_handlers (2,515,698 samples, 0.02%) + -path_openat (2,824,555 samples, 0.02%) - +__pthread_create_2_1 (3,409,299 samples, 0.02%) + -do_syscall_64 (17,268,198 samples, 0.13%) - +std::allocator<int>::allocate (3,431,863 samples, 0.02%) + -all (13,478,052,094 samples, 100%) - +_IO_new_file_fopen (19,250,418 samples, 0.12%) + + + +kernfs_fop_open (2,033,850 samples, 0.01%) + + + +_raw_spin_lock_irqsave (2,594,701 samples, 0.02%) + + + +down_read (1,523,186 samples, 0.01%) + + + +task_mm_cid_work (1,731,095 samples, 0.01%) + + + +__mod_lruvec_page_state (1,732,325 samples, 0.01%) + + + +lock_vma_under_rcu (3,289,225 samples, 0.02%) + + + +dsacache::Cache::Clear (262,080,827 samples, 1.60%) + + + +numa_bitmask_clearall (2,295,921 samples, 0.01%) + + + +__mod_node_page_state (1,602,438 samples, 0.01%) + + + +std::pair<std::__detail::_Node_iterator<std::pair<unsigned char* const, dsacache::CacheData>, false, false>, bool> std::unordered_map<unsigned char*, dsacache::CacheData, std::hash<unsigned char*>, std::equal_to<unsigned char*>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> > >::emplace<unsigned char*, dsacache::CacheData&> (2,231,659 samples, 0.01%) + + + +__do_sys_clone3 (23,342,525 samples, 0.14%) + + + +mas_preallocate (1,585,774 samples, 0.01%) + + + +__handle_mm_fault (3,280,828 samples, 0.02%) + + + +clear_huge_page (327,751,872 samples, 2.01%) +c.. + + +zap_page_range_single (9,653,225 samples, 0.06%) + + + +__folio_alloc (1,559,530 samples, 0.01%) + + + +entry_SYSCALL_64_after_hwframe (19,250,418 samples, 0.12%) + + + +inherit_event.isra.0 (19,034,831 samples, 0.12%) + + + +[libstdc++.so.6.0.32] (7,183,800,766 samples, 43.97%) +[libstdc++.so.6.0.32] + + +std::unordered_map<unsigned char*, dsacache::CacheData, std::hash<unsigned char*>, std::equal_to<unsigned char*>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> > >::clear (262,080,827 samples, 1.60%) + + + +kernfs_fop_release (5,918,508 samples, 0.04%) + + + +void std::allocator_traits<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::destroy<std::pair<unsigned char* const, dsacache::CacheData> > (262,080,827 samples, 1.60%) + + + +_raw_spin_lock_irqsave (2,414,711 samples, 0.01%) + + + +intel_invalidate_range (9,653,225 samples, 0.06%) + + + +get_unmapped_area (2,424,458 samples, 0.01%) + + + +numa_node_size64 (87,109,009 samples, 0.53%) + + + +lookup_fast (2,915,734 samples, 0.02%) + + + +mprotect_fixup (20,421,763 samples, 0.12%) + + + +tlb_finish_mmu (16,417,789 samples, 0.10%) + + + +_IO_new_file_underflow (46,708,541 samples, 0.29%) + + + +do_syscall_64 (5,974,528 samples, 0.04%) + + + +folio_batch_move_lru (4,437,935 samples, 0.03%) + + + +seq_read_iter (1,524,427 samples, 0.01%) + + + +try_charge_memcg (3,331,473 samples, 0.02%) + + + +asm_exc_page_fault (6,530,628 samples, 0.04%) + + + +void std::vector<std::thread, std::allocator<std::thread> >::_M_realloc_insert<void (3,409,299 samples, 0.02%) + + + +mod_lruvec_page_state.constprop.0 (3,035,337 samples, 0.02%) + + + +mod_lruvec_page_state.constprop.0 (1,732,325 samples, 0.01%) + + + +wqs_init (8,186,564 samples, 0.05%) + + + +uncharge_batch (2,593,230 samples, 0.02%) + + + +do_huge_pmd_anonymous_page (1,826,580,369 samples, 11.18%) +do_huge_pmd_anon.. + + +__kmem_cache_alloc_node (1,729,987 samples, 0.01%) + + + +__rmqueue_pcplist (42,564,946 samples, 0.26%) + + + +tick_sched_handle (2,170,601 samples, 0.01%) + + + +__mod_lruvec_page_state (2,643,773 samples, 0.02%) + + + +numa_alloc_onnode (58,089,727 samples, 0.36%) + + + +__free_one_page (4,326,961 samples, 0.03%) + + + +std::_Vector_base<int, std::allocator<int> >::_M_allocate (3,431,863 samples, 0.02%) + + + +entry_SYSCALL_64_after_hwframe (23,342,525 samples, 0.14%) + + + +__sysvec_apic_timer_interrupt (2,777,240 samples, 0.02%) + + + +__count_memcg_events (1,722,903 samples, 0.01%) + + + +std::allocator_traits<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::allocate (2,231,659 samples, 0.01%) + + + +tlb_batch_pages_flush (84,766,385 samples, 0.52%) + + + +intel_invalidate_range (1,729,045 samples, 0.01%) + + + +groups_init (1,490,139 samples, 0.01%) + + + +tick_sched_timer (5,146,731 samples, 0.03%) + + + +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (972,825,231 samples, 5.95%) +std::me.. + + +__GI__IO_file_doallocate (25,072,445 samples, 0.15%) + + + +_int_malloc (22,752,987 samples, 0.14%) + + + +free_unref_page (2,594,918 samples, 0.02%) + + + +numa_node_of_cpu (4,229,723 samples, 0.03%) + + + +ksys_read (20,070,864 samples, 0.12%) + + + +mprotect_fixup (38,464,959 samples, 0.24%) + + + +vma_prepare (2,822,695 samples, 0.02%) + + + +std::allocator_traits<std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> > >::allocate (44,228,686 samples, 0.27%) + + + +debug_smp_processor_id (1,617,684 samples, 0.01%) + + + +asm_sysvec_apic_timer_interrupt (2,163,378 samples, 0.01%) + + + +operator new (2,231,659 samples, 0.01%) + + + +unmap_page_range (1,728,977 samples, 0.01%) + + + +advise_stack_range (9,653,225 samples, 0.06%) + + + +getname_flags.part.0 (3,101,514 samples, 0.02%) + + + +std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false>* std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_allocate_node<unsigned char*, dsacache::CacheData&> (2,231,659 samples, 0.01%) + + + +__vm_munmap (1,728,623 samples, 0.01%) + + + +page_counter_uncharge (1,727,997 samples, 0.01%) + + + +__alloc_pages (11,611,654 samples, 0.07%) + + + +page_counter_try_charge (4,050,786 samples, 0.02%) + + + +memcg_check_events (1,732,342 samples, 0.01%) + + + +free_unref_page_prepare (13,824,412 samples, 0.08%) + + + +void std::allocator_traits<std::allocator<std::thread> >::construct<std::thread, void (3,409,299 samples, 0.02%) + + + +perf_adjust_freq_unthr_context (5,671,208 samples, 0.03%) + + + +__alloc_pages (1,559,530 samples, 0.01%) + + + +link_path_walk.part.0.constprop.0 (5,389,979 samples, 0.03%) + + + +entry_SYSCALL_64_after_hwframe (1,512,504 samples, 0.01%) + + + +operator new (3,431,863 samples, 0.02%) + + + +__folio_alloc (3,430,412 samples, 0.02%) + + + +vm_unmapped_area (1,563,935 samples, 0.01%) + + + +do_syscall_64 (1,728,623 samples, 0.01%) + + + +__sysvec_apic_timer_interrupt (2,142,780 samples, 0.01%) + + + +qi_flush_piotlb (42,381,487 samples, 0.26%) + + + +get_unused_fd_flags (1,598,288 samples, 0.01%) + + + +start_thread (7,193,453,991 samples, 44.03%) +start_thread + + +__strcasestr (4,045,089 samples, 0.02%) + + + +__mod_zone_page_state (1,729,824 samples, 0.01%) + + + +alloc_empty_file (3,366,665 samples, 0.02%) + + + +__mod_lruvec_state (1,602,438 samples, 0.01%) + + + +try_charge_memcg (2,596,328 samples, 0.02%) + + + +__handle_mm_fault (1,830,044,679 samples, 11.20%) +__handle_mm_fault + + +dsacache::CacheData::~CacheData (262,080,827 samples, 1.60%) + + + +entry_SYSCALL_64_after_hwframe (259,486,254 samples, 1.59%) + + + +asm_exc_page_fault (2,285,719 samples, 0.01%) + + + +_raw_spin_lock (1,791,831 samples, 0.01%) + + + +perf_event_mmap (33,685,958 samples, 0.21%) + + + +free_compound_page (2,593,230 samples, 0.02%) + + + +CopyMethodPolicy (3,431,863 samples, 0.02%) + + + +dml::detail::ml::buffer<std::allocator<unsigned char>, dml::detail::descriptor, dml::detail::completion_record>::buffer (44,228,686 samples, 0.27%) + + + +zap_huge_pmd (8,649,283 samples, 0.05%) + + + +dsacache::Cache::GetCacheNode (11,957,952 samples, 0.07%) + + + +check_preemption_disabled (3,692,528 samples, 0.02%) + + + +__memset (1,729,987 samples, 0.01%) + + + +perf_event_task_tick (1,541,693 samples, 0.01%) + + + +do_syscall_64 (7,728,229 samples, 0.05%) + + + +intel_invalidate_range (26,909,047 samples, 0.16%) + + + +task_work_run (1,731,095 samples, 0.01%) + + + +perf_adjust_freq_unthr_context (2,777,240 samples, 0.02%) + + + +free_unref_page_prepare (69,196,859 samples, 0.42%) + + + +vma_alloc_folio (1,694,345 samples, 0.01%) + + + +exc_page_fault (3,409,299 samples, 0.02%) + + + +hrtimer_interrupt (2,142,780 samples, 0.01%) + + + +get_page_from_freelist (1,559,530 samples, 0.01%) + + + +tick_sched_timer (1,541,693 samples, 0.01%) + + + +lru_gen_del_folio.constprop.0 (1,728,698 samples, 0.01%) + + + +__alloc_pages (2,597,639 samples, 0.02%) + + + +tick_sched_timer (7,666,902 samples, 0.05%) + + + +__folio_alloc (1,638,884,491 samples, 10.03%) +__folio_alloc + + +clear_page_erms (2,583,148 samples, 0.02%) + + + +kernfs_unlink_open_file (2,652,940 samples, 0.02%) + + + +down_write (2,582,768 samples, 0.02%) + -tick_sched_timer (20,760,710 samples, 0.15%) - +add_group (1,490,139 samples, 0.01%) + -__vm_munmap (17,268,198 samples, 0.13%) - +auto dml::detail::ml::make_mem_move_task<std::allocator<unsigned char> > (44,228,686 samples, 0.27%) + -update_process_times (1,704,757 samples, 0.01%) - +fpregs_assert_state_consistent (1,569,121 samples, 0.01%) + -tick_sched_handle (2,568,736 samples, 0.02%) - +seq_read_iter (18,635,874 samples, 0.11%) + -void fill_mt<unsigned long> (4,915,410,540 samples, 36.47%) -void fill_mt<unsigned long> +dev_attr_show (14,183,508 samples, 0.09%) + -tick_sched_timer (2,568,736 samples, 0.02%) - +free_tail_page_prepare (55,358,225 samples, 0.34%) + -do_vmi_munmap (17,268,198 samples, 0.13%) - +do_vmi_munmap (259,486,254 samples, 1.59%) + -release_pages (13,814,219 samples, 0.10%) - +entry_SYSCALL_64_after_hwframe (38,464,959 samples, 0.24%) + -numa_node_size64 (9,332,869 samples, 0.07%) - +number (3,666,378 samples, 0.02%) + -_int_malloc (25,295,540 samples, 0.19%) - +change_protection (26,909,047 samples, 0.16%) + -main (5,288,746,947 samples, 39.24%) -main +sysvec_apic_timer_interrupt (7,666,902 samples, 0.05%) + -std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (608,239,162 samples, 4.51%) -std::.. +update_process_times (6,579,035 samples, 0.04%) + -do_madvise (1,739,797 samples, 0.01%) - +__mod_lruvec_page_state (2,463,026 samples, 0.02%) + -exc_page_fault (6,888,173 samples, 0.05%) - +tick_sched_handle (6,579,035 samples, 0.04%) + -do_mprotect_pkey (6,744,844 samples, 0.05%) - +asm_sysvec_apic_timer_interrupt (8,685,354 samples, 0.05%) + -dsacache::Cache::Clear (5,177,974 samples, 0.04%) - +qi_submit_sync (16,071,349 samples, 0.10%) + -clear_huge_page (1,737,509 samples, 0.01%) - +free_unref_page (13,824,412 samples, 0.08%) + -dml::core::dispatcher::hw_device::initialize_new_device (4,958,905 samples, 0.04%) - +dml::core::dispatcher::hw_dispatcher::initialize_hw (12,186,558 samples, 0.07%) + -_raw_spin_lock (6,071,274 samples, 0.05%) - +lru_gen_add_folio (1,423,052 samples, 0.01%) + -__cond_resched (2,603,227 samples, 0.02%) - +do_user_addr_fault (3,409,299 samples, 0.02%) + -pte_alloc_one (4,339,374 samples, 0.03%) - +handle_mm_fault (2,542,611 samples, 0.02%) + -sysvec_apic_timer_interrupt (4,298,295 samples, 0.03%) - +task_work_run (5,918,508 samples, 0.04%) + -qi_submit_sync (12,655,234 samples, 0.09%) - +__count_memcg_events (2,820,695 samples, 0.02%) + -__hrtimer_run_queues (20,760,710 samples, 0.15%) - +sysfs_emit_at (13,571,657 samples, 0.08%) + -std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_node (5,177,974 samples, 0.04%) - +qi_flush_piotlb (44,113,272 samples, 0.27%) + -asm_sysvec_apic_timer_interrupt (20,760,710 samples, 0.15%) - +perf_event_task_tick (2,777,240 samples, 0.02%) + -entry_SYSCALL_64_after_hwframe (25,295,540 samples, 0.19%) - +free_unref_page (70,061,865 samples, 0.43%) + -__x64_sys_madvise (1,739,797 samples, 0.01%) - +intel_invalidate_range (19,560,452 samples, 0.12%) + -__GI_munmap (5,177,974 samples, 0.04%) - +exc_page_fault (6,013,560 samples, 0.04%) + -do_anonymous_page (1,737,026 samples, 0.01%) - +perf_event_mmap (6,351,246 samples, 0.04%) + -mprotect_fixup (6,744,844 samples, 0.05%) - +kernfs_dop_revalidate (1,551,814 samples, 0.01%) + -get_page_from_freelist (1,735,652 samples, 0.01%) - +std::_Vector_base<int, std::allocator<int> >::_M_allocate (3,431,863 samples, 0.02%) + -__hrtimer_run_queues (2,568,736 samples, 0.02%) - +asm_sysvec_apic_timer_interrupt (3,300,386 samples, 0.02%) + -tlb_batch_pages_flush (13,814,219 samples, 0.10%) - +qi_flush_dev_iotlb_pasid (32,864,156 samples, 0.20%) + -std::unordered_map<unsigned char*, dsacache::CacheData, std::hash<unsigned char*>, std::equal_to<unsigned char*>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> > >::clear (5,177,974 samples, 0.04%) - +entry_SYSCALL_64_after_hwframe (9,653,225 samples, 0.06%) + -entry_SYSCALL_64_after_hwframe (3,468,037 samples, 0.03%) - +charge_memcg (11,014,135 samples, 0.07%) + -dml::core::dispatcher::hw_dispatcher::initialize_hw (4,958,906 samples, 0.04%) - +handle_mm_fault (1,830,044,679 samples, 11.20%) +handle_mm_fault -aggr_j (2,119,810,645 samples, 15.73%) -aggr_j +std::pair<unsigned char* const, dsacache::CacheData>::~pair (262,080,827 samples, 1.60%) + -clear_page_erms (1,214,507,617 samples, 9.01%) -clear_page_erms +__sysvec_apic_timer_interrupt (7,666,902 samples, 0.05%) + -device_parse (4,519,291 samples, 0.03%) - +entry_SYSCALL_64_after_hwframe (6,690,044 samples, 0.04%) + -task_tick_fair (1,734,074 samples, 0.01%) - +std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> >::allocate (2,231,659 samples, 0.01%) + -accfg_wq_get_first (4,519,291 samples, 0.03%) - +sysvec_apic_timer_interrupt (1,541,693 samples, 0.01%) + -dsacache::Cache::Access (1,737,345 samples, 0.01%) - +unmap_vmas (91,685,764 samples, 0.56%) + -do_syscall_64 (25,295,540 samples, 0.19%) - +entry_SYSCALL_64_after_hwframe (2,565,568 samples, 0.02%) + -Aggregation<unsigned long, Sum, (2,116,333,329 samples, 15.70%) -Aggregation<unsigned lon.. +__mem_cgroup_uncharge (2,593,230 samples, 0.02%) + -__libc_start_call_main (5,290,426,542 samples, 39.25%) -__libc_start_call_main +handle_mm_fault (1,722,903 samples, 0.01%) + -qi_flush_piotlb (2,570,042 samples, 0.02%) - +asm_sysvec_apic_timer_interrupt (7,666,902 samples, 0.05%) + -vma_alloc_folio (1,475,070,703 samples, 10.94%) -vma_alloc_folio +__mem_cgroup_charge (7,762,650 samples, 0.05%) + -futex_wait (1,739,970 samples, 0.01%) - +__x64_sys_munmap (18,146,766 samples, 0.11%) + -void std::destroy_at<std::pair<unsigned char* const, dsacache::CacheData> > (5,177,974 samples, 0.04%) - +unmap_region (18,146,766 samples, 0.11%) + -[libstdc++.so.6.0.32] (7,573,014,148 samples, 56.19%) -[libstdc++.so.6.0.32] +syscall (7,728,229 samples, 0.05%) + -free_unref_page_prepare (12,087,054 samples, 0.09%) - +do_syscall_64 (20,070,864 samples, 0.12%) + -__mem_cgroup_charge (1,735,148 samples, 0.01%) - +release_pages (14,688,744 samples, 0.09%) + -void std::allocator_traits<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::destroy<std::pair<unsigned char* const, dsacache::CacheData> > (5,177,974 samples, 0.04%) - +kernel_clone (23,342,525 samples, 0.14%) + -scheduler_tick (2,568,736 samples, 0.02%) - +down_read_trylock (3,277,199 samples, 0.02%) + -qi_submit_sync (2,570,042 samples, 0.02%) - +mas_store_gfp (1,728,362 samples, 0.01%) + -clock_gettime@plt (32,929,155 samples, 0.24%) - +qi_submit_sync (2,592,030 samples, 0.02%) + -do_syscall_64 (1,739,797 samples, 0.01%) - +__mod_lruvec_page_state (1,729,498 samples, 0.01%) + -__alloc_pages (1,475,070,703 samples, 10.94%) -__alloc_pages +asm_sysvec_apic_timer_interrupt (1,541,693 samples, 0.01%) + -sum_check (350,026,063 samples, 2.60%) -su.. +tick_sched_handle (2,777,240 samples, 0.02%) + -kernel_clone (3,468,037 samples, 0.03%) - +seq_release (2,404,641 samples, 0.01%) + -_IO_new_file_underflow (8,469,848 samples, 0.06%) - +scheduler_tick (1,460,612 samples, 0.01%) + -qi_submit_sync (10,915,702 samples, 0.08%) - +exit_to_user_mode_prepare (2,109,428 samples, 0.01%) + -entry_SYSCALL_64_after_hwframe (6,744,844 samples, 0.05%) - +dsacache::Cache::SubmitTask (2,831,153,568 samples, 17.33%) +dsacache::Cache::SubmitTask -std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::_M_gen_rand (433,839,037 samples, 3.22%) -std.. +grow_heap (39,224,955 samples, 0.24%) + -intel_invalidate_range (23,570,936 samples, 0.17%) - +__mem_cgroup_charge (3,461,902 samples, 0.02%) + -hrtimer_interrupt (6,036,217 samples, 0.04%) - +do_vmi_align_munmap (1,728,623 samples, 0.01%) + -__x64_sys_mprotect (6,744,844 samples, 0.05%) - +malloc_consolidate (1,469,475 samples, 0.01%) + -__sysfs_device_parse (4,519,291 samples, 0.03%) - +_mm512_stream_load_si512 (384,094,614 samples, 2.35%) +_.. -scheduler_tick (20,760,710 samples, 0.15%) - +std::vector<int, std::allocator<int> >::vector (3,431,863 samples, 0.02%) + -qi_flush_dev_iotlb_pasid (12,655,234 samples, 0.09%) - +tlb_batch_pages_flush (14,688,744 samples, 0.09%) + -__alloc_pages (4,339,374 samples, 0.03%) - +do_mprotect_pkey (20,421,763 samples, 0.12%) + -do_mprotect_pkey (25,295,540 samples, 0.19%) - +entry_SYSCALL_64_after_hwframe (50,596,001 samples, 0.31%) + -dml::detail::ml::impl::hardware::submit (5,803,116 samples, 0.04%) - +asm_exc_page_fault (1,833,508,599 samples, 11.22%) +asm_exc_page_fault -__GI___clock_gettime (1,628,054,632 samples, 12.08%) -__GI___clock_gettime +Filter<unsigned long, LT, (1,678,013,429 samples, 10.27%) +Filter<unsigned.. -hrtimer_interrupt (2,568,736 samples, 0.02%) - +qi_submit_sync (8,586,262 samples, 0.05%) + -qi_flush_piotlb (1,679,595 samples, 0.01%) - +clear_page_erms (1,694,345 samples, 0.01%) + -perf_adjust_freq_unthr_context (1,704,757 samples, 0.01%) - +clear_page_erms (3,430,412 samples, 0.02%) + -std::barrier<NopStruct>::wait (1,739,970 samples, 0.01%) - +accfg_wq_get_first (8,186,564 samples, 0.05%) + -clear_page_erms (307,946,959 samples, 2.28%) -c.. +__libc_openat64 (2,565,568 samples, 0.02%) + -__rmqueue_pcplist (1,735,652 samples, 0.01%) - +__page_cache_release (1,728,698 samples, 0.01%) + -__mmu_notifier_invalidate_range_end (1,679,595 samples, 0.01%) - +page_remove_rmap (3,459,855 samples, 0.02%) + -inherit_event.isra.0 (2,599,737 samples, 0.02%) - +scheduler_tick (7,117,074 samples, 0.04%) + -do_syscall_64 (1,679,595 samples, 0.01%) - +vma_complete (1,732,102 samples, 0.01%) + diff --git a/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul16-wl4294967296-cs2097152.csv b/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul16-wl4294967296-cs2097152.csv new file mode 100644 index 0000000..8d39a70 --- /dev/null +++ b/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul16-wl4294967296-cs2097152.csv @@ -0,0 +1,6 @@ +run;rt-ns;rt-s;result[0];scana-run;scana-wait;scanb-run;scanb-wait;aggrj-run;aggrj-wait;cache-hr; +0;129548241;0.129548;13289362112;53177682;22141;0;0;56668250;62924170;0; +1;131066794;0.131067;13289362112;55050670;26671;0;0;60095789;57973060;0; +2;129953382;0.129953;13289362112;54855583;55095;0;0;56085882;57622391;0; +3;130793552;0.130794;13289362112;54111835;361393;0;0;57056650;59773214;0; +4;131000929;0.131001;13289362112;53896158;81691;0;0;58550850;58654956;0; diff --git a/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv b/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv deleted file mode 100644 index 6f9aff0..0000000 --- a/qdp_project/evaluation-results/qdp-xeonmax-dram-tca2-tcb0-tcj1-tmul8-wl4294967296-cs2097152.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;rt-ns;rt-s;result[0];scana-run;scana-wait;scana-load;scanb-run;scanb-wait;aggrj-run;aggrj-wait;aggrj-load;cache-hr; -0;1384070228;1.38407;13289362112;18446744073708121004;910;1437012;0;0;0;441613;1572806;0; -1;1253705586;1.25371;13289362112;18446744073708444589;751;1113377;0;0;0;323023;1297474;0; -2;1424691737;1.42469;13289362112;18446744073708138019;1328;1419632;0;0;0;400037;1557709;0; -3;1329719999;1.32972;13289362112;18446744073708274218;312;1284204;0;0;0;392582;1317678;0; -4;1250425103;1.25043;13289362112;18446744073708047114;201;1511398;0;0;0;416545;1451243;0; diff --git a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv deleted file mode 100644 index b71893b..0000000 --- a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;rt-ns;rt-s;result[0];scana-run;scana-wait;scana-load;scanb-run;scanb-wait;aggrj-run;aggrj-wait;aggrj-load;cache-hr; -0;2536072344;2.53607;13289362112;18446744073697239175;995173;13157651;2188748;112;0;4544721;18447110;0; -1;2165383914;2.16538;13289362112;18446744073686547899;1562266;23639880;2040783;129;0;4083149;28451690;0; -2;2393350274;2.39335;13289362112;18446744073691218761;914891;19038237;2050101;95;0;3989943;23093564;0; -3;2542202609;2.5422;13289362112;18446744073697519601;1370138;12615641;3695908;83;0;4736486;16720786;0; -4;2616699345;2.6167;13289362112;18446744073695083480;1461789;15194072;2172819;141;0;3963663;18705128;0; diff --git a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv new file mode 100644 index 0000000..fab451b --- /dev/null +++ b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-dist-tca2-tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv @@ -0,0 +1,6 @@ +run;rt-ns;rt-s;result[0];scana-run;scana-wait;scanb-run;scanb-wait;aggrj-run;aggrj-wait;cache-hr; +0;306933779;0.306934;13289362112;61659372;1246;223881968;7072;67513214;223979989;1; +1;352235825;0.352236;13289362112;61314450;1927;269539617;10039;72487462;269660252;1; +2;349261970;0.349262;13289362112;61858709;1189;247293635;7002;74874730;247369264;1; +3;314103127;0.314103;13289362112;60018882;1226;223568363;8213;72411399;223551183;1; +4;304913879;0.304914;13289362112;61635217;1621;217438169;9473;72352839;217540746;1; diff --git a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv deleted file mode 100644 index a180d11..0000000 --- a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs16777216.csv +++ /dev/null @@ -1,6 +0,0 @@ -run;rt-ns;rt-s;result[0];scana-run;scana-wait;scana-load;scanb-run;scanb-wait;aggrj-run;aggrj-wait;aggrj-load;cache-hr; -0;2470863547;2.47086;13289362112;18446744073690031357;1196819;20069202;1963658;107;0;3377995;22622836;0; -1;2528668383;2.52867;13289362112;18446744073690542201;1270587;19964463;1958977;95;0;3524605;22578890;0; -2;1995968797;1.99597;13289362112;18446744073689806113;582965;20816720;1956052;93;0;4951653;23045303;0; -3;2015452757;2.01545;13289362112;18446744073684432123;1643622;25889421;1947255;100;0;6225001;19511459;0; -4;2280860370;2.28086;13289362112;18446744073691893576;1347346;18264335;4137877;117;0;4813737;21454208;0; diff --git a/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv new file mode 100644 index 0000000..a1ce202 --- /dev/null +++ b/qdp_project/evaluation-results/qdp-xeonmax-prefetch-tca2-tcb1-tcj1-tmul8-wl4294967296-cs8388608.csv @@ -0,0 +1 @@ +run;rt-ns;rt-s;result[0];scana-run;scana-wait;scanb-run;scanb-wait;aggrj-run;aggrj-wait;cache-hr;