diff --git a/qdp_project/evaluation-results/perf.new.svg b/qdp_project/evaluation-results/perf.new.svg new file mode 100644 index 0000000..a877d29 --- /dev/null +++ b/qdp_project/evaluation-results/perf.new.svg @@ -0,0 +1,2561 @@ + + + + + + + + + + + + + + +Flame Graph + +Reset Zoom +Search +ic + + + +qi_submit_sync (5,274,943 samples, 0.05%) + + + +std::thread::_M_start_thread (2,576,245 samples, 0.02%) + + + +entry_SYSCALL_64_after_hwframe (4,816,807 samples, 0.04%) + + + +do_syscall_64 (7,832,406 samples, 0.07%) + + + +advise_stack_range (3,340,721 samples, 0.03%) + + + +task_work_run (3,977,278 samples, 0.04%) + + + +perf_iterate_sb.constprop.0 (1,544,977 samples, 0.01%) + + + +asm_sysvec_apic_timer_interrupt (2,180,552 samples, 0.02%) + + + +perf_adjust_freq_unthr_context (8,995,452 samples, 0.08%) + + + +down_write (2,575,917 samples, 0.02%) + + + +__libc_start_call_main (6,715,787,788 samples, 61.85%) +__libc_start_call_main + + +lru_gen_del_folio.constprop.0 (1,727,547 samples, 0.02%) + + + +__mod_lruvec_page_state (3,747,166 samples, 0.03%) + + + +do_syscall_64 (1,418,159 samples, 0.01%) + + + +exit_to_user_mode_prepare (1,005,518 samples, 0.01%) + + + +__libc_openat64 (2,539,730 samples, 0.02%) + + + +charge_memcg (1,731,219 samples, 0.02%) + + + +qi_submit_sync (14,767,685 samples, 0.14%) + + + +__mem_cgroup_charge (1,731,219 samples, 0.02%) + + + +syscall (3,456,757 samples, 0.03%) + + + +entry_SYSCALL_64_after_hwframe (29,370,212 samples, 0.27%) + + + +perf_iterate_sb.constprop.0 (11,662,927 samples, 0.11%) + + + +__mem_cgroup_uncharge (1,727,729 samples, 0.02%) + + + +do_anonymous_page (1,731,219 samples, 0.02%) + + + +dev_attr_show (6,597,170 samples, 0.06%) + + + +vm_area_alloc (2,149,175 samples, 0.02%) + + + +folio_batch_move_lru (1,728,947 samples, 0.02%) + + + +dml::handler<dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >::allocator_type> dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (44,280,626 samples, 0.41%) + + + +release_pages (12,951,918 samples, 0.12%) + + + +exit_mmap (1,418,159 samples, 0.01%) + + + +numa_alloc_onnode (27,306,291 samples, 0.25%) + + + +mod_memcg_state (2,519,582 samples, 0.02%) + + + +vfs_fstatat (1,412,403 samples, 0.01%) + + + +kernfs_fop_release (2,475,815 samples, 0.02%) + + + +__mod_memcg_state (2,519,582 samples, 0.02%) + + + +do_user_addr_fault (7,222,744 samples, 0.07%) + + + +sum_check (1,624,148,506 samples, 14.96%) +sum_check + + +qi_flush_dev_iotlb_pasid (6,025,194 samples, 0.06%) + + + +Sum<unsigned long>::simd_agg (407,977,039 samples, 3.76%) +Sum<.. + + +_start (6,715,901,033 samples, 61.85%) +_start + + +vma_alloc_folio (867,846,598 samples, 7.99%) +vma_alloc_f.. + + +syscall_exit_to_user_mode (1,005,518 samples, 0.01%) + + + +_IO_new_file_close_it (4,816,807 samples, 0.04%) + + + +do_huge_pmd_anonymous_page (1,850,694,882 samples, 17.05%) +do_huge_pmd_anonymous_page + + +std::__new_allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (30,229,702 samples, 0.28%) + + + +dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (30,983,963 samples, 0.29%) + + + +__fput (2,475,815 samples, 0.02%) + + + +unmap_vmas (2,592,830 samples, 0.02%) + + + +do_dentry_open (1,013,775 samples, 0.01%) + + + +asm_exc_page_fault (1,717,471 samples, 0.02%) + + + +perf_iterate_ctx (11,662,927 samples, 0.11%) + + + +free_unref_page (9,497,058 samples, 0.09%) + + + +mod_lruvec_page_state.constprop.0 (1,728,266 samples, 0.02%) + + + +__list_del_entry_valid (2,986,041 samples, 0.03%) + + + +exc_page_fault (1,717,471 samples, 0.02%) + + + +sysvec_apic_timer_interrupt (2,180,552 samples, 0.02%) + + + +__libc_open64 (1,013,775 samples, 0.01%) + + + +entry_SYSCALL_64_after_hwframe (1,412,403 samples, 0.01%) + + + +do_mprotect_pkey (12,999,915 samples, 0.12%) + + + +free_unref_page_prepare (22,481,129 samples, 0.21%) + + + +chrdev_open (1,013,775 samples, 0.01%) + + + +asm_exc_page_fault (1,852,943,584 samples, 17.07%) +asm_exc_page_fault + + +do_syscall_64 (124,499,915 samples, 1.15%) + + + +exc_page_fault (1,316,699,581 samples, 12.13%) +exc_page_fault + + +memcg_check_events (1,812,415 samples, 0.02%) + + + +intel_invalidate_range (37,178,741 samples, 0.34%) + + + +do_syscall_64 (3,456,757 samples, 0.03%) + + + +qi_flush_piotlb (14,767,685 samples, 0.14%) + + + +dml::detail::ml::task<std::allocator<unsigned char> >::task (30,229,702 samples, 0.28%) + + + +check_preemption_disabled (1,474,772 samples, 0.01%) + + + +asm_sysvec_apic_timer_interrupt (2,241,589 samples, 0.02%) + + + +ksys_read (10,850,783 samples, 0.10%) + + + +folio_add_lru (1,728,947 samples, 0.02%) + + + +accfg_get_param_long (5,596,223 samples, 0.05%) + + + +__mod_lruvec_state (1,728,947 samples, 0.02%) + + + +[libstdc++.so.6.0.32] (3,527,419,633 samples, 32.49%) +[libstdc++.so.6.0.32] + + +_mm512_stream_load_si512 (174,354,821 samples, 1.61%) + + + +do_user_addr_fault (1,316,699,581 samples, 12.13%) +do_user_addr_fault + + +__rmqueue_pcplist (1,232,673 samples, 0.01%) + + + +entry_SYSCALL_64_after_hwframe (3,382,035 samples, 0.03%) + + + +__GI___getdelim (30,878,762 samples, 0.28%) + + + +tick_sched_timer (1,365,948 samples, 0.01%) + + + +entry_SYSCALL_64_after_hwframe (3,340,721 samples, 0.03%) + + + +__x64_sys_futex (1,709,662 samples, 0.02%) + + + +__folio_alloc (2,215,907 samples, 0.02%) + + + +std::thread& std::vector<std::thread, std::allocator<std::thread> >::emplace_back<void (2,576,245 samples, 0.02%) + + + +link_path_walk.part.0.constprop.0 (4,734,613 samples, 0.04%) + + + +vma_merge (2,577,034 samples, 0.02%) + + + +qi_flush_piotlb (5,274,943 samples, 0.05%) + + + +do_syscall_64 (1,665,609 samples, 0.02%) + + + +down_read_trylock (1,616,803 samples, 0.01%) + + + +std::thread::thread<void (2,576,245 samples, 0.02%) + + + +_raw_spin_lock (1,729,481 samples, 0.02%) + + + +clear_page_erms (3,102,402 samples, 0.03%) + + + +__hrtimer_run_queues (1,300,420 samples, 0.01%) + + + +node_read_meminfo (6,597,170 samples, 0.06%) + + + +kmem_cache_alloc_node (2,596,627 samples, 0.02%) + + + +device_add (1,013,775 samples, 0.01%) + + + +numa_node_size64 (49,206,680 samples, 0.45%) + + + +mmap_region (20,376,546 samples, 0.19%) + + + +__GI___libc_malloc (18,102,965 samples, 0.17%) + + + +syscall_exit_to_user_mode (4,089,240 samples, 0.04%) + + + +__hrtimer_run_queues (1,365,948 samples, 0.01%) + + + +clear_page_erms (1,701,012 samples, 0.02%) + + + +inherit_event.isra.0 (6,922,986 samples, 0.06%) + + + +do_user_addr_fault (1,717,471 samples, 0.02%) + + + +__alloc_pages (1,508,595,053 samples, 13.89%) +__alloc_pages + + +__vm_munmap (16,406,617 samples, 0.15%) + + + +dsacache::CacheData::Deallocate (124,499,915 samples, 1.15%) + + + +__mod_node_page_state (1,728,266 samples, 0.02%) + + + +get_page_from_freelist (2,898,882 samples, 0.03%) + + + +__mmu_notifier_invalidate_range_end (10,596,312 samples, 0.10%) + + + +memcg_slab_post_alloc_hook (2,149,175 samples, 0.02%) + + + +page_remove_rmap (1,729,439 samples, 0.02%) + + + +__pthread_clockjoin_ex (1,709,662 samples, 0.02%) + + + +path_openat (1,014,603 samples, 0.01%) + + + +do_syscall_64 (1,412,403 samples, 0.01%) + + + +kernel_clone (6,922,986 samples, 0.06%) + + + +__kmem_cache_alloc_node (1,664,645 samples, 0.02%) + + + +QDPBench (10,857,675,450 samples, 100.00%) +QDPBench + + +sysfs_emit_at (4,018,619 samples, 0.04%) + + + +__x64_sys_munmap (16,406,617 samples, 0.15%) + + + +mbind_range (2,522,606 samples, 0.02%) + + + +unmap_page_range (2,592,830 samples, 0.02%) + + + +dsacache::CacheData::~CacheData (124,499,915 samples, 1.15%) + + + +scan_b (1,479,617,976 samples, 13.63%) +scan_b + + +__folio_alloc (2,898,882 samples, 0.03%) + + + +do_sys_openat2 (7,832,406 samples, 0.07%) + + + +qi_submit_sync (25,935,644 samples, 0.24%) + + + +_raw_spin_lock (1,728,713 samples, 0.02%) + + + +__alloc_pages (867,087,699 samples, 7.99%) +__alloc_pages + + +hrtimer_interrupt (8,995,452 samples, 0.08%) + + + +__GI_mprotect (29,370,212 samples, 0.27%) + + + +sysvec_apic_timer_interrupt (2,241,589 samples, 0.02%) + + + +__GI__IO_file_doallocate (19,515,368 samples, 0.18%) + + + +inode_permission (2,233,658 samples, 0.02%) + + + +copy_process (6,922,986 samples, 0.06%) + + + +vscnprintf (4,018,619 samples, 0.04%) + + + +__GI___close (1,005,518 samples, 0.01%) + + + +__GI___libc_read (11,363,394 samples, 0.10%) + + + +__GI_madvise (3,340,721 samples, 0.03%) + + + +__libc_open64 (7,832,406 samples, 0.07%) + + + +handle_mm_fault (7,222,744 samples, 0.07%) + + + +__kmalloc_node (1,664,645 samples, 0.02%) + + + +__vm_munmap (124,499,915 samples, 1.15%) + + + +sysmalloc (29,370,212 samples, 0.27%) + + + +__rmqueue_pcplist (6,972,673 samples, 0.06%) + + + +unmap_region (121,905,385 samples, 1.12%) + + + +grow_heap (29,370,212 samples, 0.27%) + + + +allocate_stack (2,576,245 samples, 0.02%) + + + +__GI_munmap (124,499,915 samples, 1.15%) + + + +qi_submit_sync (2,478,462 samples, 0.02%) + + + +__futex_abstimed_wait_common (1,709,662 samples, 0.02%) + + + +exc_page_fault (7,222,744 samples, 0.07%) + + + +_mm512_mask_add_epi64 (407,977,039 samples, 3.76%) +_mm5.. + + +do_mmap (21,476,337 samples, 0.20%) + + + +dsacache::Cache::Access (1,478,760,036 samples, 13.62%) +dsacache::Cache::Acc.. + + +__mem_cgroup_charge (8,853,813 samples, 0.08%) + + + +do_syscall_64 (1,013,775 samples, 0.01%) + + + +do_vmi_align_munmap (16,406,617 samples, 0.15%) + + + +accfg_device_get_first (1,369,844 samples, 0.01%) + + + +perf_event_task_tick (1,365,948 samples, 0.01%) + + + +__GI__IO_file_open (7,832,406 samples, 0.07%) + + + +internal_get_user_pages_fast (2,235,330 samples, 0.02%) + + + +dsacache::Cache::Clear (124,499,915 samples, 1.15%) + + + +lru_add_fn (3,945,551 samples, 0.04%) + + + +do_syscall_64 (957,900 samples, 0.01%) + + + +entry_SYSCALL_64_after_hwframe (1,014,603 samples, 0.01%) + + + +inherit_task_group.isra.0 (6,922,986 samples, 0.06%) + + + +release_pages (29,397,390 samples, 0.27%) + + + +void std::destroy_at<std::pair<unsigned char* const, dsacache::CacheData> > (124,499,915 samples, 1.15%) + + + +hrtimer_interrupt (1,365,948 samples, 0.01%) + + + +do_syscall_64 (23,194,745 samples, 0.21%) + + + +vma_merge (2,522,606 samples, 0.02%) + + + +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (1,707,102,197 samples, 15.72%) +unsigned int std::unifor.. + + +do_vmi_align_munmap (124,499,915 samples, 1.15%) + + + +kernfs_iop_permission (1,719,405 samples, 0.02%) + + + +unmap_page_range (5,187,701 samples, 0.05%) + + + +devices_init (1,369,844 samples, 0.01%) + + + +tick_sched_handle (1,365,948 samples, 0.01%) + + + +unsigned long std::uniform_int_distribution<unsigned long>::operator (2,651,695,239 samples, 24.42%) +unsigned long std::uniform_int_distrib.. + + +dsacache::Cache::SubmitTask (1,474,554,916 samples, 13.58%) +dsacache::Cache::Sub.. + + +__mmu_notifier_invalidate_range (37,178,741 samples, 0.34%) + + + +do_syscall_64 (4,816,807 samples, 0.04%) + + + +number (1,718,765 samples, 0.02%) + + + +__GI___close_nocancel (4,816,807 samples, 0.04%) + + + +__x64_sys_openat (7,832,406 samples, 0.07%) + + + +__alloc_pages (2,215,907 samples, 0.02%) + + + +do_filp_open (7,021,877 samples, 0.06%) + + + +aggr_j (1,283,482,019 samples, 11.82%) +aggr_j + + +format_decode (1,657,716 samples, 0.02%) + + + +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (990,880,606 samples, 9.13%) +std::mersenne.. + + +update_process_times (1,300,420 samples, 0.01%) + + + +hrtimer_interrupt (2,020,732 samples, 0.02%) + + + +futex_wait (1,709,662 samples, 0.02%) + + + +uncharge_batch (1,727,729 samples, 0.02%) + + + +dml::core::dispatcher::hw_device::initialize_new_device (10,649,925 samples, 0.10%) + + + +intel_invalidate_range (49,276,701 samples, 0.45%) + + + +exit_to_user_mode_prepare (1,526,259 samples, 0.01%) + + + +std::thread::join (1,709,662 samples, 0.02%) + + + +lru_gen_add_folio (3,945,551 samples, 0.04%) + + + +dml::core::dispatcher::hw_dispatcher::initialize_hw (12,436,625 samples, 0.11%) + + + +try_grab_folio (1,464,761 samples, 0.01%) + + + +check_preemption_disabled (974,903 samples, 0.01%) + + + +accfg_get_param_str (2,020,121 samples, 0.02%) + + + +main (6,713,260,491 samples, 61.83%) +main + + +change_protection (21,306,971 samples, 0.20%) + + + +qi_submit_sync (18,156,520 samples, 0.17%) + + + +preempt_count_add (1,729,481 samples, 0.02%) + + + +tlb_batch_pages_flush (13,813,787 samples, 0.13%) + + + +numa_node_of_cpu (1,719,530 samples, 0.02%) + + + +__hrtimer_run_queues (8,995,452 samples, 0.08%) + + + +free_tail_page_prepare (5,179,307 samples, 0.05%) + + + +__fput (1,005,518 samples, 0.01%) + + + +__x64_sys_madvise (3,340,721 samples, 0.03%) + + + +lock_vma_under_rcu (2,662,357 samples, 0.02%) + + + +std::_Hashtable<unsigned char*, std::pair<unsigned char* const, dsacache::CacheData>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> >, std::__detail::_Select1st, std::equal_to<unsigned char*>, std::hash<unsigned char*>, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits<false, false, true> >::clear (124,499,915 samples, 1.15%) + + + +path_openat (2,028,052 samples, 0.02%) + + + +dsacache::Cache::ExecuteCopy (44,280,626 samples, 0.41%) + + + +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (596,422,567 samples, 5.49%) +std::me.. + + +do_syscall_64 (10,850,783 samples, 0.10%) + + + +sysfs_kf_seq_show (6,597,170 samples, 0.06%) + + + +entry_SYSCALL_64_after_hwframe (1,665,609 samples, 0.02%) + + + +__pthread_create_2_1 (2,576,245 samples, 0.02%) + + + +asm_exc_page_fault (7,813,295 samples, 0.07%) + + + +accfg_get_param_long (1,927,699 samples, 0.02%) + + + +get_page_from_freelist (1,232,673 samples, 0.01%) + + + +change_protection (10,596,312 samples, 0.10%) + + + +__fput (1,014,019 samples, 0.01%) + + + +mprotect_fixup (12,141,289 samples, 0.11%) + + + +qi_submit_sync (23,341,057 samples, 0.21%) + + + +free_tail_page_prepare (17,289,659 samples, 0.16%) + + + +vfs_read (10,850,783 samples, 0.10%) + + + +std::pair<unsigned char* const, dsacache::CacheData>::~pair (124,499,915 samples, 1.15%) + + + +mem_cgroup_charge_statistics (1,608,803 samples, 0.01%) + + + +do_vmi_munmap (124,499,915 samples, 1.15%) + + + +__count_memcg_events (1,834,351 samples, 0.02%) + + + +all (10,857,675,452 samples, 100%) + + + +scan_a (764,319,638 samples, 7.04%) +scan_a + + +entry_SYSCALL_64_after_hwframe (3,456,757 samples, 0.03%) + + + +update_process_times (8,995,452 samples, 0.08%) + + + +__sysvec_apic_timer_interrupt (2,020,732 samples, 0.02%) + + + +__mmu_notifier_invalidate_range_end (49,276,701 samples, 0.45%) + + + +__mod_lruvec_state (1,246,575 samples, 0.01%) + + + +clear_page_erms (312,006,524 samples, 2.87%) +cl.. + + +clear_page_erms (683,869,034 samples, 6.30%) +clear_pa.. + + +entry_SYSCALL_64_after_hwframe (12,999,915 samples, 0.12%) + + + +clear_page_erms (370,773,520 samples, 3.41%) +cle.. + + +entry_SYSCALL_64_after_hwframe (1,013,775 samples, 0.01%) + + + +_raw_spin_lock_irqsave (1,728,447 samples, 0.02%) + + + +__cond_resched (5,709,094 samples, 0.05%) + + + +_IO_new_file_underflow (30,878,762 samples, 0.28%) + + + +do_user_addr_fault (1,852,943,584 samples, 17.07%) +do_user_addr_fault + + +[unknown] (4,194,714 samples, 0.04%) + + + +vma_alloc_folio (1,509,460,438 samples, 13.90%) +vma_alloc_folio + + +task_work_run (1,005,518 samples, 0.01%) + + + +vsnprintf (4,018,619 samples, 0.04%) + + + +do_filp_open (2,028,052 samples, 0.02%) + + + +task_mm_cid_work (1,726,993 samples, 0.02%) + + + +__sysvec_apic_timer_interrupt (1,365,948 samples, 0.01%) + + + +do_syscall_64 (29,370,212 samples, 0.27%) + + + +exit_to_user_mode_prepare (1,726,993 samples, 0.02%) + + + +do_filp_open (1,014,603 samples, 0.01%) + + + +numa_node_of_cpu (1,530,542 samples, 0.01%) + + + +entry_SYSCALL_64_after_hwframe (10,850,783 samples, 0.10%) + + + +__x64_sys_openat (1,014,603 samples, 0.01%) + + + +tick_sched_handle (8,995,452 samples, 0.08%) + + + +qi_flush_dev_iotlb_pasid (23,341,057 samples, 0.21%) + + + +free_unref_page (22,481,129 samples, 0.21%) + + + +__cond_resched (6,573,644 samples, 0.06%) + + + +std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (30,229,702 samples, 0.28%) + + + +call_init (1,196,824 samples, 0.01%) + + + +__mod_lruvec_page_state (1,728,266 samples, 0.02%) + + + +charge_memcg (4,949,036 samples, 0.05%) + + + +wqs_init (8,620,835 samples, 0.08%) + + + +__rmqueue_pcplist (26,751,206 samples, 0.25%) + + + +zap_huge_pmd (3,457,292 samples, 0.03%) + + + +scheduler_tick (1,365,948 samples, 0.01%) + + + +folio_add_lru (2,701,975 samples, 0.02%) + + + +__handle_mm_fault (7,222,744 samples, 0.07%) + + + +perf_iterate_ctx (5,486,207 samples, 0.05%) + + + +perf_event_mmap (1,544,977 samples, 0.01%) + + + +sync_regs (4,312,756 samples, 0.04%) + + + +void fill_mt<unsigned long> (4,943,244,832 samples, 45.53%) +void fill_mt<unsigned long> + + +path_openat (7,021,877 samples, 0.06%) + + + +__GI__IO_doallocbuf (19,515,368 samples, 0.18%) + + + +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (596,422,567 samples, 5.49%) +unsigne.. + + +entry_SYSCALL_64_after_hwframe (2,038,073 samples, 0.02%) + + + +free_compound_page (1,727,729 samples, 0.02%) + + + +sysvec_apic_timer_interrupt (8,995,452 samples, 0.08%) + + + +security_vm_enough_memory_mm (1,591,769 samples, 0.01%) + + + +__GI_exit (2,527,297 samples, 0.02%) + + + +do_mprotect_pkey (29,370,212 samples, 0.27%) + + + +_IO_new_fclose (4,816,807 samples, 0.04%) + + + +__GI___mmap64 (23,194,745 samples, 0.21%) + + + +handle_mm_fault (1,852,943,584 samples, 17.07%) +handle_mm_fault + + +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::_M_gen_rand (426,886,794 samples, 3.93%) +std:.. + + +do_sys_openat2 (1,014,603 samples, 0.01%) + + + +__GI___fstatat64 (1,412,403 samples, 0.01%) + + + +idxd_cdev_open (1,013,775 samples, 0.01%) + + + +vm_mmap_pgoff (23,194,745 samples, 0.21%) + + + +mod_lruvec_page_state.constprop.0 (3,747,166 samples, 0.03%) + + + +__alloc_pages (2,898,882 samples, 0.03%) + + + +groups_init (2,409,723 samples, 0.02%) + + + +std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_node (124,499,915 samples, 1.15%) + + + +_IO_new_file_fopen (8,432,767 samples, 0.08%) + + + +__mod_lruvec_page_state (1,710,376 samples, 0.02%) + + + +vma_alloc_folio (2,215,907 samples, 0.02%) + + + +clear_huge_page (396,622,667 samples, 3.65%) +clea.. + + +__GI_munmap (1,665,609 samples, 0.02%) + + + +_mid_memalign (30,229,702 samples, 0.28%) + + + +vfs_read (957,900 samples, 0.01%) + + + +__list_del_entry_valid (1,232,673 samples, 0.01%) + + + +read (1,018,420 samples, 0.01%) + + + +entry_SYSCALL_64_after_hwframe (1,005,518 samples, 0.01%) + + + +mas_store_prealloc (1,537,267 samples, 0.01%) + + + +void std::allocator_traits<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::destroy<std::pair<unsigned char* const, dsacache::CacheData> > (124,499,915 samples, 1.15%) + + + +__x64_sys_mprotect (29,370,212 samples, 0.27%) + + + +do_syscall_64 (1,005,518 samples, 0.01%) + + + +handle_mm_fault (1,312,938,533 samples, 12.09%) +handle_mm_fault + + +do_sys_openat2 (1,013,775 samples, 0.01%) + + + +kmem_cache_alloc (2,149,175 samples, 0.02%) + + + +unmap_vmas (54,464,402 samples, 0.50%) + + + +dsacache::Cache::GetCacheNode (2,577,577 samples, 0.02%) + + + +dsacache::Cache::Access (4,987,299 samples, 0.05%) + + + +__mod_lruvec_state (1,728,266 samples, 0.02%) + + + +vfs_statx (1,412,403 samples, 0.01%) + + + +update_process_times (1,365,948 samples, 0.01%) + + + +folio_batch_move_lru (2,701,975 samples, 0.02%) + + + +do_syscall_64 (3,382,035 samples, 0.03%) + + + +pte_alloc_one (10,782,226 samples, 0.10%) + + + +dml::detail::ml::buffer<std::allocator<unsigned char>, dml::detail::descriptor, dml::detail::completion_record>::buffer (30,229,702 samples, 0.28%) + + + +entry_SYSCALL_64_after_hwframe (957,900 samples, 0.01%) + + + +tlb_batch_pages_flush (29,397,390 samples, 0.27%) + + + +__next_zones_zonelist (1,695,609 samples, 0.02%) + + + +free_unref_page_prepare (9,497,058 samples, 0.09%) + + + +__run_exit_handlers (2,527,297 samples, 0.02%) + + + +__x64_sys_openat (1,013,775 samples, 0.01%) + + + +do_mbind (3,382,035 samples, 0.03%) + + + +kernel_mbind (3,382,035 samples, 0.03%) + + + +get_page_from_freelist (2,215,907 samples, 0.02%) + + + +_IO_new_file_init_internal (1,489,012 samples, 0.01%) + + + +__folio_alloc (1,508,595,053 samples, 13.89%) +__folio_alloc + + +mbind (3,382,035 samples, 0.03%) + + + +qi_submit_sync (19,022,221 samples, 0.18%) + + + +auto dml::detail::submit<dml::hardware, dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >, dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (44,280,626 samples, 0.41%) + + + +qi_flush_piotlb (25,935,644 samples, 0.24%) + + + +device_parse (8,620,835 samples, 0.08%) + + + +vma_alloc_folio (3,551,167 samples, 0.03%) + + + +dsacache::Cache::GetCacheNode (4,987,299 samples, 0.05%) + + + +decltype (2,576,245 samples, 0.02%) + + + +exit_to_user_mode_prepare (4,089,240 samples, 0.04%) + + + +qi_flush_dev_iotlb_pasid (5,321,369 samples, 0.05%) + + + +clear_huge_page (333,454,013 samples, 3.07%) +cle.. + + +__count_memcg_events (1,102,429 samples, 0.01%) + + + +__sysvec_apic_timer_interrupt (8,995,452 samples, 0.08%) + + + +__alloc_pages (1,729,251 samples, 0.02%) + + + +do_syscall_64 (16,406,617 samples, 0.15%) + + + +do_sys_openat2 (2,028,052 samples, 0.02%) + + + +add_wq (8,112,387 samples, 0.07%) + + + +Filter<unsigned long, LT, (764,319,624 samples, 7.04%) +Filter<un.. + + +lru_gen_del_folio.constprop.0 (1,729,406 samples, 0.02%) + + + +__x64_sys_munmap (124,499,915 samples, 1.15%) + + + +perf_event_task_tick (8,995,452 samples, 0.08%) + + + +unsigned long std::uniform_int_distribution<unsigned long>::operator (596,422,567 samples, 5.49%) +unsigne.. + + +__x64_sys_exit_group (1,418,159 samples, 0.01%) + + + +folio_batch_move_lru (3,945,551 samples, 0.04%) + + + +dsacache::Cache::SubmitTask (13,296,663 samples, 0.12%) + + + +tick_sched_timer (1,300,420 samples, 0.01%) + + + +std::__detail::_Hashtable_alloc<std::allocator<std::__detail::_Hash_node<std::pair<unsigned char* const, dsacache::CacheData>, false> > >::_M_deallocate_nodes (124,499,915 samples, 1.15%) + + + +kernfs_seq_start (1,981,537 samples, 0.02%) + + + +memcg_account_kmem (2,519,582 samples, 0.02%) + + + +folio_add_new_anon_rmap (1,710,376 samples, 0.02%) + + + +_int_malloc (17,243,479 samples, 0.16%) + + + +__mmu_notifier_invalidate_range_end (21,306,971 samples, 0.20%) + + + +__GI___libc_read (957,900 samples, 0.01%) + + + +dml::core::dispatcher::hw_queue::initialize_new_queue (2,029,090 samples, 0.02%) + + + +start_thread (3,530,760,354 samples, 32.52%) +start_thread + + +__this_cpu_preempt_check (1,035,514 samples, 0.01%) + + + +perf_event_alloc (6,057,194 samples, 0.06%) + + + +unsigned long std::uniform_int_distribution<unsigned long>::operator (2,659,479,023 samples, 24.49%) +unsigned long std::uniform_int_distrib.. + + +__do_sys_newfstatat (1,412,403 samples, 0.01%) + + + +read (957,900 samples, 0.01%) + + + +do_vmi_munmap (16,406,617 samples, 0.15%) + + + +Vector_Loader<unsigned long, (174,354,821 samples, 1.61%) + + + +__GI__IO_link_in (1,489,012 samples, 0.01%) + + + +call_init (1,196,824 samples, 0.01%) + + + +vm_unmapped_area (1,099,791 samples, 0.01%) + + + +devices_init (2,865,392 samples, 0.03%) + + + +do_syscall_64 (2,539,730 samples, 0.02%) + + + +asm_sysvec_apic_timer_interrupt (1,285,604 samples, 0.01%) + + + +intel_invalidate_range (10,596,312 samples, 0.10%) + + + +syscall_exit_to_user_mode (1,526,259 samples, 0.01%) + + + +tlb_finish_mmu (67,440,983 samples, 0.62%) + + + +do_syscall_64 (3,340,721 samples, 0.03%) + + + +_dl_init (1,196,824 samples, 0.01%) + + + +mas_wr_store_entry.isra.0 (1,730,304 samples, 0.02%) + + + +qi_flush_piotlb (19,022,221 samples, 0.18%) + + + +openat (1,014,603 samples, 0.01%) + + + +__mmu_notifier_invalidate_range_end (2,478,462 samples, 0.02%) + + + +kernel_get_mempolicy (3,456,757 samples, 0.03%) + + + +do_huge_pmd_anonymous_page (6,842,888 samples, 0.06%) + + + +_raw_spin_lock (2,066,301 samples, 0.02%) + + + +scheduler_tick (1,256,830 samples, 0.01%) + + + +qi_submit_sync (5,321,369 samples, 0.05%) + + + +get_mem_cgroup_from_mm (3,904,777 samples, 0.04%) + + + +get_page_from_freelist (864,889,410 samples, 7.97%) +get_page_fr.. + + +intel_invalidate_range (21,306,971 samples, 0.20%) + + + +do_syscall_64 (2,038,073 samples, 0.02%) + + + +entry_SYSCALL_64_after_hwframe (124,499,915 samples, 1.15%) + + + +__alloc_pages (7,035,060 samples, 0.06%) + + + +__page_cache_release (1,727,547 samples, 0.02%) + + + +__libc_openat64 (1,014,603 samples, 0.01%) + + + +__libc_start_main_impl (6,715,787,788 samples, 61.85%) +__libc_start_main_impl + + +do_anonymous_page (13,645,186 samples, 0.13%) + + + +path_openat (1,013,775 samples, 0.01%) + + + +dsacache::Cache::AllocOnNode (1,427,696,755 samples, 13.15%) +dsacache::Cache::Al.. + + +accfg_wq_get_first (8,620,835 samples, 0.08%) + + + +dml::core::hardware_device::submit (13,296,663 samples, 0.12%) + + + +do_filp_open (1,013,775 samples, 0.01%) + + + +up_read (1,064,042 samples, 0.01%) + + + +irqentry_exit_to_user_mode (1,726,993 samples, 0.02%) + + + +asm_exc_page_fault (1,330,852,392 samples, 12.26%) +asm_exc_page_fault + + +folio_add_lru (3,945,551 samples, 0.04%) + + + +count_memcg_events.constprop.0 (1,834,351 samples, 0.02%) + + + +__GI___libc_read (957,900 samples, 0.01%) + + + +check_preemption_disabled (1,564,788 samples, 0.01%) + + + +lru_gen_add_folio (2,271,523 samples, 0.02%) + + + +std::allocator_traits<std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> > >::allocate (30,229,702 samples, 0.28%) + + + +auto dml::detail::ml::make_mem_move_task<std::allocator<unsigned char> > (30,229,702 samples, 0.28%) + + + +entry_SYSCALL_64_after_hwframe (7,832,406 samples, 0.07%) + + + +__sysfs_device_parse (8,620,835 samples, 0.08%) + + + +clone3 (3,537,683,340 samples, 32.58%) +clone3 + + +__sysfs_device_parse (2,409,723 samples, 0.02%) + + + +mprotect_fixup (29,370,212 samples, 0.27%) + + + +mutex_lock (1,981,537 samples, 0.02%) + + + +sysmalloc (15,576,431 samples, 0.14%) + + + +__GI__IO_doallocbuf (19,515,368 samples, 0.18%) + + + +tick_sched_timer (8,995,452 samples, 0.08%) + + + +__x64_sys_mprotect (12,999,915 samples, 0.12%) + + + +__fopen_internal (10,781,028 samples, 0.10%) + + + +__memset (1,731,249 samples, 0.02%) + + + +dml::core::dispatcher::hw_dispatcher::~hw_dispatcher (1,665,609 samples, 0.02%) + + + +_int_memalign (30,229,702 samples, 0.28%) + + + +__memcg_kmem_charge_page (5,802,387 samples, 0.05%) + + + +do_futex (1,709,662 samples, 0.02%) + + + +grow_heap (13,858,960 samples, 0.13%) + + + +lru_add_fn (1,728,947 samples, 0.02%) + + + +pte_alloc_one (1,729,251 samples, 0.02%) + + + +try_charge_memcg (3,358,641 samples, 0.03%) + + + +__mod_node_page_state (1,246,575 samples, 0.01%) + + + +intel_invalidate_range (2,478,462 samples, 0.02%) + + + +perf_event_mmap (11,662,927 samples, 0.11%) + + + +operator new (30,229,702 samples, 0.28%) + + + +do_exit (1,418,159 samples, 0.01%) + + + +__mod_memcg_lruvec_state (1,134,175 samples, 0.01%) + + + +__GI___libc_read (11,363,394 samples, 0.10%) + + + +_dl_start_user (1,196,824 samples, 0.01%) + + + +__futex_abstimed_wait_common64 (1,709,662 samples, 0.02%) + + + +entry_SYSCALL_64_after_hwframe (23,194,745 samples, 0.21%) + + + +exc_page_fault (1,852,943,584 samples, 17.07%) +exc_page_fault + + +__x64_sys_get_mempolicy (3,456,757 samples, 0.03%) + + + +dml::core::dispatcher::hw_dispatcher::hw_dispatcher (12,436,625 samples, 0.11%) + + + +arch_get_unmapped_area_topdown (1,099,791 samples, 0.01%) + + + +__mem_cgroup_charge (4,164,584 samples, 0.04%) + + + +perf_event_mmap (5,486,207 samples, 0.05%) + + + +__GI___close (2,038,073 samples, 0.02%) + + + +charge_memcg (3,957,434 samples, 0.04%) + + + +sysvec_apic_timer_interrupt (1,285,604 samples, 0.01%) + + + +qi_flush_piotlb (2,478,462 samples, 0.02%) + + + +qi_flush_dev_iotlb_pasid (18,156,520 samples, 0.17%) + + + +do_madvise (3,340,721 samples, 0.03%) + + + +ksys_read (957,900 samples, 0.01%) + + + +dml::detail::ml::impl::hardware::submit (13,296,663 samples, 0.12%) + + + +clear_page_erms (1,266,328 samples, 0.01%) + + + +__GI_munmap (16,406,617 samples, 0.15%) + + + +do_group_exit (1,418,159 samples, 0.01%) + + + +entry_SYSCALL_64_after_hwframe (16,406,617 samples, 0.15%) + + + +void std::vector<std::thread, std::allocator<std::thread> >::_M_realloc_insert<void (2,576,245 samples, 0.02%) + + + +device_parse (2,409,723 samples, 0.02%) + + + +void std::allocator_traits<std::allocator<std::thread> >::construct<std::thread, void (2,576,245 samples, 0.02%) + + + +task_work_run (1,526,259 samples, 0.01%) + + + +lru_gen_add_folio (1,728,947 samples, 0.02%) + + + +mas_store_gfp (1,730,304 samples, 0.02%) + + + +do_syscall_64 (12,999,915 samples, 0.12%) + + + +dml::core::dispatcher::hw_dispatcher::get_instance (12,436,625 samples, 0.11%) + + + +get_unmapped_area (1,099,791 samples, 0.01%) + + + +zap_page_range_single (2,478,462 samples, 0.02%) + + + +do_syscall_64 (1,709,662 samples, 0.02%) + + + +do_huge_pmd_anonymous_page (1,293,979,138 samples, 11.92%) +do_huge_pmd_anony.. + + +task_work_run (1,726,993 samples, 0.02%) + + + +perf_iterate_sb.constprop.0 (5,486,207 samples, 0.05%) + + + +try_charge_memcg (3,282,805 samples, 0.03%) + + + +__folio_alloc (867,087,699 samples, 7.99%) +__folio_alloc + + +openat (2,539,730 samples, 0.02%) + + + +__mmput (1,418,159 samples, 0.01%) + + + +__handle_mm_fault (1,852,426,101 samples, 17.06%) +__handle_mm_fault + + +asm_sysvec_apic_timer_interrupt (10,722,445 samples, 0.10%) + + + +__GI___mmap64 (23,194,745 samples, 0.21%) + + + +do_syscall_64 (6,922,986 samples, 0.06%) + + + +qi_submit_sync (6,025,194 samples, 0.06%) + + + +tlb_finish_mmu (13,813,787 samples, 0.13%) + + + +entry_SYSCALL_64_after_hwframe (2,539,730 samples, 0.02%) + + + +clear_huge_page (3,619,034 samples, 0.03%) + + + +entry_SYSCALL_64_after_hwframe (1,709,662 samples, 0.02%) + + + +page_counter_try_charge (1,715,991 samples, 0.02%) + + + +entry_SYSCALL_64_after_hwframe (1,418,159 samples, 0.01%) + + + +clear_page_erms (1,291,549,968 samples, 11.90%) +clear_page_erms + + +do_syscall_64 (1,014,603 samples, 0.01%) + + + +malloc_consolidate (963,610 samples, 0.01%) + + + +tick_sched_handle (1,300,420 samples, 0.01%) + + + +scheduler_tick (8,995,452 samples, 0.08%) + + + +lru_add_fn (2,271,523 samples, 0.02%) + + + +entry_SYSCALL_64_after_hwframe (6,922,986 samples, 0.06%) + + + +syscall (3,382,035 samples, 0.03%) + + + +perf_adjust_freq_unthr_context (1,365,948 samples, 0.01%) + + + +__mod_node_page_state (1,728,947 samples, 0.02%) + + + +seq_read_iter (10,243,352 samples, 0.09%) + + + +seq_read_iter (957,900 samples, 0.01%) + + + +std::unordered_map<unsigned char*, dsacache::CacheData, std::hash<unsigned char*>, std::equal_to<unsigned char*>, std::allocator<std::pair<unsigned char* const, dsacache::CacheData> > >::clear (124,499,915 samples, 1.15%) + + + +add_group (2,409,723 samples, 0.02%) + + + +__handle_mm_fault (1,309,922,715 samples, 12.06%) +__handle_mm_fault + + +get_page_from_freelist (1,507,729,794 samples, 13.89%) +get_page_from_freelist + + +memcg_check_events (1,590,395 samples, 0.01%) + + + +perf_iterate_ctx (1,544,977 samples, 0.01%) + + + +_int_malloc (30,229,702 samples, 0.28%) + + + +__do_sys_clone3 (6,922,986 samples, 0.06%) + + + +__GI_mprotect (13,858,960 samples, 0.13%) + + + +Aggregation<unsigned long, Sum, (1,278,494,713 samples, 11.78%) +Aggregation<unsig.. + + +unmap_region (16,406,617 samples, 0.15%) + + + +perf_event_init_task (6,922,986 samples, 0.06%) + + + +__x64_sys_openat (2,028,052 samples, 0.02%) + + + +