diff --git a/qdp_project/evaluation-results/outofcacheallocation/perf.svg b/qdp_project/evaluation-results/outofcacheallocation/perf.svg index 3371d4d..2732db4 100644 --- a/qdp_project/evaluation-results/outofcacheallocation/perf.svg +++ b/qdp_project/evaluation-results/outofcacheallocation/perf.svg @@ -1,6 +1,6 @@ - + @@ -421,2021 +421,1965 @@ } ]]> - + Flame Graph - + Reset Zoom Search ic - + -qi_submit_sync (1,721,543 samples, 0.01%) - +release_pages (52,708,279 samples, 0.10%) + -lru_gen_add_folio (61,919,923 samples, 0.50%) - +aggr_j (6,104,856,818 samples, 11.92%) +aggr_j -tick_sched_timer (3,160,021 samples, 0.03%) - - - -mtree_range_walk (24,100,177 samples, 0.19%) - - - -do_vmi_munmap (364,334,473 samples, 2.94%) -do.. - - -__mem_cgroup_uncharge_list (10,319,468 samples, 0.08%) - - - -_raw_spin_lock_irqsave (3,440,417 samples, 0.03%) - - - -__GI___libc_malloc (1,371,122 samples, 0.01%) - - - -__alloc_pages (3,440,812 samples, 0.03%) - - - -sysvec_apic_timer_interrupt (3,976,134 samples, 0.03%) - - - -__mod_memcg_lruvec_state (6,685,883 samples, 0.05%) - +dml::core::dispatcher::hw_device::initialize_new_device (10,597,924 samples, 0.02%) + -free_pcppages_bulk (16,349,754 samples, 0.13%) - +cgroup_rstat_updated (260,795,386 samples, 0.51%) + -qi_submit_sync (3,439,253 samples, 0.03%) - +free_unref_page_prepare (11,193,897 samples, 0.02%) + -check_preemption_disabled (1,541,954 samples, 0.01%) - +error_entry (19,445,388 samples, 0.04%) + -error_entry (2,056,951 samples, 0.02%) - +__x64_sys_openat (4,893,850 samples, 0.01%) + -task_work_run (2,249,086 samples, 0.02%) - +memcg_check_events (2,082,895,253 samples, 4.07%) +memc.. -mas_walk (5,990,206 samples, 0.05%) - +__get_vma_policy (6,052,375 samples, 0.01%) + -update_process_times (3,906,120 samples, 0.03%) - +policy_node (11,237,982 samples, 0.02%) + -page_remove_rmap (57,590,246 samples, 0.46%) - +__handle_mm_fault (1,740,573,708 samples, 3.40%) +__h.. -intel_invalidate_range (5,992,675 samples, 0.05%) - +up_read (54,454,946 samples, 0.11%) + -check_preemption_disabled (2,573,156 samples, 0.02%) - +mmap_region (12,738,596 samples, 0.02%) + -perf_event_task_tick (3,929,255 samples, 0.03%) - +__mod_memcg_lruvec_state (18,251,466 samples, 0.04%) + -inc_mm_counter (14,540,292 samples, 0.12%) - +kthread_blkcg (5,187,501 samples, 0.01%) + -__vm_munmap (364,334,473 samples, 2.94%) -__.. +__pte_alloc (4,825,833 samples, 0.01%) + -__irqentry_text_end (1,724,097 samples, 0.01%) - +__mem_cgroup_uncharge_list (24,144,483 samples, 0.05%) + -__rcu_read_unlock (1,719,797 samples, 0.01%) - +folio_add_lru (612,805,660 samples, 1.20%) + -hrtimer_interrupt (23,218,296 samples, 0.19%) - +_raw_spin_unlock (8,778,490 samples, 0.02%) + -__this_cpu_preempt_check (30,979,572 samples, 0.25%) - +do_user_addr_fault (38,214,263 samples, 0.07%) + -set_pte (2,583,753 samples, 0.02%) - +sysvec_apic_timer_interrupt (7,679,459 samples, 0.02%) + -unsigned long std::uniform_int_distribution<unsigned long>::operator (2,583,136,896 samples, 20.81%) -unsigned long std::uniform_int_d.. +__rcu_read_lock (24,197,159 samples, 0.05%) + -folio_mapping (3,444,420 samples, 0.03%) - +check_preemption_disabled (6,898,806 samples, 0.01%) + -__sysvec_apic_timer_interrupt (3,943,095 samples, 0.03%) - +qi_submit_sync (42,245,564 samples, 0.08%) + -lru_gen_del_folio.constprop.0 (72,070,497 samples, 0.58%) - +check_preemption_disabled (19,006,854 samples, 0.04%) + -do_syscall_64 (1,901,491 samples, 0.02%) - +check_preemption_disabled (156,498,969 samples, 0.31%) + -vma_alloc_folio (102,855,112 samples, 0.83%) - +get_mem_cgroup_from_mm (115,813,924 samples, 0.23%) + -folio_batch_move_lru (111,817,407 samples, 0.90%) - +__next_zones_zonelist (40,627,227 samples, 0.08%) + -do_syscall_64 (364,334,473 samples, 2.94%) -do.. +__alloc_pages (374,402,804 samples, 0.73%) + -aggr_j (1,367,823,130 samples, 11.02%) -aggr_j +dsacache::Cache::GetCacheNode (6,787,704 samples, 0.01%) + -free_pages_and_swap_cache (2,577,335 samples, 0.02%) - +dsacache::Cache::GetCacheNode (19,011,999 samples, 0.04%) + -_mm512_stream_load_si512 (192,782,902 samples, 1.55%) - +change_protection (201,181,727 samples, 0.39%) + -asm_sysvec_apic_timer_interrupt (23,218,296 samples, 0.19%) - +check_preemption_disabled (11,365,460 samples, 0.02%) + -debug_smp_processor_id (3,420,454 samples, 0.03%) - +__this_cpu_preempt_check (6,897,820 samples, 0.01%) + -__mem_cgroup_charge (172,570,591 samples, 1.39%) - +__folio_throttle_swaprate (38,895,583 samples, 0.08%) + -try_charge_memcg (293,499,149 samples, 2.36%) -t.. +__this_cpu_preempt_check (12,203,410 samples, 0.02%) + -do_user_addr_fault (2,184,156,136 samples, 17.60%) -do_user_addr_fault +perf_iterate_ctx (7,917,062 samples, 0.02%) + -__this_cpu_preempt_check (2,576,672 samples, 0.02%) - +vm_normal_page (16,383,619 samples, 0.03%) + -qi_flush_dev_iotlb_pasid (2,553,422 samples, 0.02%) - +__rcu_read_lock (5,184,268 samples, 0.01%) + -asm_sysvec_apic_timer_interrupt (3,906,120 samples, 0.03%) - +vma_alloc_folio (737,893,285 samples, 1.44%) + -mem_cgroup_charge_statistics (58,297,817 samples, 0.47%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (5,186,627 samples, 0.01%) + -charge_memcg (157,574,324 samples, 1.27%) - +check_preemption_disabled (12,072,500 samples, 0.02%) + -cgroup_rstat_updated (5,660,087 samples, 0.05%) - +__hrtimer_run_queues (16,533,531 samples, 0.03%) + -__do_sys_clone3 (12,026,411 samples, 0.10%) - +__list_del_entry_valid (6,896,516 samples, 0.01%) + -task_work_run (1,725,552 samples, 0.01%) - +__GI___mmap64 (14,975,543 samples, 0.03%) + -__list_add_valid (3,440,861 samples, 0.03%) - +__mod_lruvec_state (28,519,367 samples, 0.06%) + -do_syscall_64 (48,297,259 samples, 0.39%) - +charge_memcg (5,532,425,220 samples, 10.81%) +charge_memcg -__mod_memcg_lruvec_state (18,926,700 samples, 0.15%) - +syscall (10,248,023 samples, 0.02%) + -entry_SYSCALL_64_after_hwframe (4,355,769 samples, 0.04%) - +Aggregation<unsigned long, Sum, (6,079,728,229 samples, 11.88%) +Aggregation<unsig.. -error_entry (18,074,532 samples, 0.15%) - +check_preemption_disabled (24,263,995 samples, 0.05%) + -preempt_count_sub (1,715,936 samples, 0.01%) - +asm_exc_page_fault (4,003,098,524 samples, 7.82%) +asm_exc_pag.. -lru_add_fn (47,336,565 samples, 0.38%) - +__folio_throttle_swaprate (22,634,579 samples, 0.04%) + -free_unref_page_commit (1,721,876 samples, 0.01%) - +p4d_offset (12,965,965 samples, 0.03%) + -debug_smp_processor_id (1,881,431 samples, 0.02%) - +grow_heap (209,389,327 samples, 0.41%) + -_raw_spin_trylock (11,170,612 samples, 0.09%) - +blk_cgroup_congested (20,052,243 samples, 0.04%) + -__alloc_pages (129,835,440 samples, 1.05%) - +do_filp_open (4,455,340 samples, 0.01%) + -all (12,411,866,374 samples, 100%) - +numa_bitmask_alloc (4,453,868 samples, 0.01%) + -__rmqueue_pcplist (2,580,511 samples, 0.02%) - +lru_gen_del_folio.constprop.0 (257,002,243 samples, 0.50%) + -do_mprotect_pkey (48,297,259 samples, 0.39%) - +free_unref_page_list (232,831,460 samples, 0.45%) + -__mod_memcg_lruvec_state (8,598,535 samples, 0.07%) - +qi_flush_dev_iotlb_pasid (27,600,815 samples, 0.05%) + -scheduler_tick (3,906,120 samples, 0.03%) - +__this_cpu_preempt_check (5,174,961 samples, 0.01%) + -__mod_node_page_state (3,447,380 samples, 0.03%) - +__this_cpu_preempt_check (9,488,485 samples, 0.02%) + -__rcu_read_unlock (1,718,019 samples, 0.01%) - +mprotect_fixup (208,874,517 samples, 0.41%) + -tick_sched_timer (23,218,296 samples, 0.19%) - +QDPBench (51,196,000,579 samples, 100.00%) +QDPBench -p4d_offset (3,439,714 samples, 0.03%) - +inc_mm_counter (85,556,117 samples, 0.17%) + -__alloc_file (1,947,956 samples, 0.02%) - +check_preemption_disabled (347,960,254 samples, 0.68%) + -lock_vma_under_rcu (20,490,564 samples, 0.17%) - +std::__new_allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (214,949,728 samples, 0.42%) + -exc_page_fault (12,653,436 samples, 0.10%) - +dml::core::dispatcher::hw_dispatcher::initialize_hw (11,868,621 samples, 0.02%) + -tick_sched_timer (3,906,120 samples, 0.03%) - +advise_stack_range (5,811,133 samples, 0.01%) + -unmap_region (364,334,473 samples, 2.94%) -un.. +check_preemption_disabled (6,193,135 samples, 0.01%) + -qi_submit_sync (2,553,422 samples, 0.02%) - +exc_page_fault (8,328,027,484 samples, 16.27%) +exc_page_fault -check_preemption_disabled (14,575,733 samples, 0.12%) - +_mm512_mask_add_epi64 (1,637,756,210 samples, 3.20%) +_mm.. -preempt_count_add (6,022,766 samples, 0.05%) - +__handle_mm_fault (7,789,560,264 samples, 15.22%) +__handle_mm_fault -path_openat (3,516,552 samples, 0.03%) - +entry_SYSCALL_64_after_hwframe (4,439,475 samples, 0.01%) + -access_error (5,319,263 samples, 0.04%) - +exit_to_user_mode_prepare (84,694,281 samples, 0.17%) + -__rmqueue_pcplist (29,235,326 samples, 0.24%) - +charge_memcg (565,367,043 samples, 1.10%) + -perf_event_init_task (9,447,890 samples, 0.08%) - +__this_cpu_preempt_check (6,027,347 samples, 0.01%) + -check_preemption_disabled (1,721,448 samples, 0.01%) - +__x64_sys_get_mempolicy (7,453,000 samples, 0.01%) + -scheduler_tick (2,551,340 samples, 0.02%) - +cgroup_rstat_updated (10,348,548 samples, 0.02%) + -entry_SYSCALL_64_after_hwframe (12,026,411 samples, 0.10%) - +__do_sys_clone3 (12,937,992 samples, 0.03%) + -qi_flush_piotlb (2,233,100 samples, 0.02%) - +irqentry_exit_to_user_mode (41,316,055 samples, 0.08%) + -_raw_spin_trylock (8,313,791 samples, 0.07%) - +free_swap_cache (12,071,475 samples, 0.02%) + -__this_cpu_preempt_check (7,201,711 samples, 0.06%) - +tick_sched_handle (7,679,459 samples, 0.02%) + -exit_to_user_mode_prepare (2,249,086 samples, 0.02%) - +cgroup_rstat_updated (6,916,629 samples, 0.01%) + -__mod_lruvec_page_state (1,372,559 samples, 0.01%) - +mem_cgroup_charge_statistics (213,849,573 samples, 0.42%) + -qi_submit_sync (25,549,617 samples, 0.21%) - +dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (214,949,728 samples, 0.42%) + -__rcu_read_lock (3,437,970 samples, 0.03%) - +handle_mm_fault (29,947,547 samples, 0.06%) + -intel_invalidate_range (10,325,016 samples, 0.08%) - +__mod_memcg_lruvec_state (52,719,383 samples, 0.10%) + -get_page_from_freelist (2,580,511 samples, 0.02%) - +__irqentry_text_end (7,779,280 samples, 0.02%) + -entry_SYSCALL_64_after_hwframe (364,334,473 samples, 2.94%) -en.. +__mod_zone_page_state (31,978,239 samples, 0.06%) + -__mod_memcg_lruvec_state (13,762,194 samples, 0.11%) - +_raw_spin_trylock (53,586,323 samples, 0.10%) + -check_preemption_disabled (11,868,516 samples, 0.10%) - +get_mem_cgroup_from_mm (35,627,896 samples, 0.07%) + -__mod_memcg_lruvec_state (6,023,074 samples, 0.05%) - +lru_gen_add_folio (237,708,968 samples, 0.46%) + -kmem_cache_alloc_node (2,566,038 samples, 0.02%) - +asm_sysvec_apic_timer_interrupt (17,644,723 samples, 0.03%) + -lock_vma_under_rcu (54,211,570 samples, 0.44%) - +perf_adjust_freq_unthr_context (13,548,659 samples, 0.03%) + -folio_batch_move_lru (57,119,887 samples, 0.46%) - +Sum<unsigned long>::simd_agg (1,637,756,210 samples, 3.20%) +Sum.. -preempt_count_add (3,443,046 samples, 0.03%) - +device_parse (8,667,141 samples, 0.02%) + -cgroup_rstat_updated (2,058,189 samples, 0.02%) - +__rcu_read_unlock (6,054,769 samples, 0.01%) + -__mem_cgroup_charge (1,515,220,915 samples, 12.21%) -__mem_cgroup_charge +count_memcg_events.constprop.0 (59,357,552 samples, 0.12%) + -__list_del_entry_valid (1,720,143 samples, 0.01%) - +cgroup_rstat_updated (9,488,492 samples, 0.02%) + -__mem_cgroup_charge (1,545,503 samples, 0.01%) - +kmem_cache_alloc_node (5,173,381 samples, 0.01%) + -handle_mm_fault (11,527,517 samples, 0.09%) - +__rcu_read_unlock (8,769,197 samples, 0.02%) + -__count_memcg_events (504,197,789 samples, 4.06%) -__co.. +pte_alloc_one (10,374,798 samples, 0.02%) + -hrtimer_interrupt (3,929,255 samples, 0.03%) - +main (37,159,085,518 samples, 72.58%) +main -zap_page_range_single (3,907,002 samples, 0.03%) - +do_anonymous_page (28,572,888 samples, 0.06%) + -check_preemption_disabled (1,719,936 samples, 0.01%) - +__hrtimer_run_queues (7,679,459 samples, 0.02%) + -__mod_node_page_state (7,731,576 samples, 0.06%) - +__this_cpu_preempt_check (129,647,384 samples, 0.25%) + -__this_cpu_preempt_check (1,723,398 samples, 0.01%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (5,186,627 samples, 0.01%) + -folio_add_lru_vma (1,720,759 samples, 0.01%) - +check_preemption_disabled (10,346,755 samples, 0.02%) + -uncharge_folio (9,458,702 samples, 0.08%) - +policy_nodemask (51,791,473 samples, 0.10%) + -uncharge_folio (1,720,711 samples, 0.01%) - +__rcu_read_unlock (9,312,892 samples, 0.02%) + -irqentry_exit_to_user_mode (18,702,075 samples, 0.15%) - +mod_lruvec_page_state.constprop.0 (5,186,872 samples, 0.01%) + -__irqentry_text_end (3,439,257 samples, 0.03%) - +__this_cpu_preempt_check (16,355,591 samples, 0.03%) + -copy_process (10,306,337 samples, 0.08%) - +folio_add_new_anon_rmap (120,552,539 samples, 0.24%) + -entry_SYSCALL_64_after_hwframe (2,757,577 samples, 0.02%) - +__mod_lruvec_state (24,080,793 samples, 0.05%) + -__bitmap_intersects (1,720,047 samples, 0.01%) - +qi_flush_dev_iotlb_pasid (27,592,601 samples, 0.05%) + -__list_add_valid (3,439,621 samples, 0.03%) - +do_syscall_64 (4,439,475 samples, 0.01%) + -__next_zones_zonelist (6,884,192 samples, 0.06%) - +intel_invalidate_range (5,811,133 samples, 0.01%) + -blk_cgroup_congested (11,188,106 samples, 0.09%) - +__tlb_remove_page_size (7,761,283 samples, 0.02%) + -preempt_count_add (1,720,359 samples, 0.01%) - +pud_val (12,969,218 samples, 0.03%) + -obj_cgroup_charge (1,250,263 samples, 0.01%) - +free_pages_and_swap_cache (12,071,475 samples, 0.02%) + -pmd_val (2,581,534 samples, 0.02%) - +release_pages (24,481,735 samples, 0.05%) + -do_anonymous_page (10,761,753 samples, 0.09%) - +__mod_lruvec_page_state (82,866,016 samples, 0.16%) + -check_preemption_disabled (4,977,565 samples, 0.04%) - +debug_smp_processor_id (76,082,188 samples, 0.15%) + -preempt_count_add (1,373,927 samples, 0.01%) - +qi_submit_sync (27,592,601 samples, 0.05%) + -_raw_spin_lock (14,624,277 samples, 0.12%) - +fpregs_assert_state_consistent (21,009,124 samples, 0.04%) + -__mod_lruvec_page_state (37,861,524 samples, 0.31%) - +check_preemption_disabled (19,865,367 samples, 0.04%) + -Filter<unsigned long, LT, (954,595,693 samples, 7.69%) -Filter<uns.. +__next_zones_zonelist (59,712,839 samples, 0.12%) + -__list_del_entry_valid (1,720,847 samples, 0.01%) - +sum_check (7,506,033,272 samples, 14.66%) +sum_check -do_syscall_64 (2,696,024 samples, 0.02%) - +sync_regs (716,147,497 samples, 1.40%) + -qi_submit_sync (6,883,789 samples, 0.06%) - +entry_SYSCALL_64_after_hwframe (1,446,084,556 samples, 2.82%) +en.. -__sysvec_apic_timer_interrupt (3,929,255 samples, 0.03%) - +debug_smp_processor_id (6,049,220 samples, 0.01%) + -tlb_batch_pages_flush (81,748,300 samples, 0.66%) - +__handle_mm_fault (29,086,417 samples, 0.06%) + -__mod_zone_page_state (6,882,656 samples, 0.06%) - - - -vfs_read (1,723,340 samples, 0.01%) - - - -entry_SYSCALL_64_after_hwframe (2,696,024 samples, 0.02%) - - - -__rcu_read_lock (1,718,910 samples, 0.01%) - - - -check_preemption_disabled (63,667,847 samples, 0.51%) - - - -qi_flush_piotlb (25,549,617 samples, 0.21%) - - - -unmap_page_range (252,470,009 samples, 2.03%) -u.. +do_syscall_64 (4,893,850 samples, 0.01%) + -release_pages (80,026,979 samples, 0.64%) - +in_lock_functions (7,782,211 samples, 0.02%) + -check_preemption_disabled (2,579,616 samples, 0.02%) - +perf_iterate_sb.constprop.0 (7,917,062 samples, 0.02%) + -qi_flush_piotlb (17,208,829 samples, 0.14%) - +qi_submit_sync (13,797,540 samples, 0.03%) + -do_vmi_align_munmap (364,334,473 samples, 2.94%) -do.. +mtree_range_walk (36,506,379 samples, 0.07%) + -void fill_mt<unsigned long> (5,953,509,698 samples, 47.97%) -void fill_mt<unsigned long> +__rmqueue_pcplist (170,232,441 samples, 0.33%) + -__x64_sys_munmap (364,334,473 samples, 2.94%) -__.. +Vector_Loader<unsigned long, (744,562,723 samples, 1.45%) + -clear_page_erms (1,702,838 samples, 0.01%) - +pmd_page_vaddr (7,779,371 samples, 0.02%) + -preempt_count_sub (1,723,108 samples, 0.01%) - +__mod_lruvec_page_state (134,523,882 samples, 0.26%) + -perf_event_task_tick (1,968,706 samples, 0.02%) - +memcg_check_events (234,546,968 samples, 0.46%) + -__alloc_pages (1,702,838 samples, 0.01%) - +charge_memcg (7,480,344 samples, 0.01%) + -__sysvec_apic_timer_interrupt (23,218,296 samples, 0.19%) - +entry_SYSCALL_64_after_hwframe (12,937,992 samples, 0.03%) + -percpu_counter_add_batch (30,662,619 samples, 0.25%) - +__GI___mmap64 (14,975,543 samples, 0.03%) + -check_preemption_disabled (4,302,124 samples, 0.03%) - +do_mmap (13,252,429 samples, 0.03%) + -do_syscall_64 (4,508,436 samples, 0.04%) - +__this_cpu_preempt_check (4,807,293 samples, 0.01%) + -scheduler_tick (23,218,296 samples, 0.19%) - +__slab_alloc.isra.0 (5,173,381 samples, 0.01%) + -__rcu_read_lock (2,573,313 samples, 0.02%) - +check_preemption_disabled (13,828,299 samples, 0.03%) + -start_thread (2,323,402,749 samples, 18.72%) -start_thread +__mmu_notifier_invalidate_range_end (201,181,727 samples, 0.39%) + -down_read_trylock (11,483,273 samples, 0.09%) - +__rcu_read_lock (6,913,047 samples, 0.01%) + -handle_mm_fault (472,491,587 samples, 3.81%) -hand.. +perf_event_mmap (5,036,520 samples, 0.01%) + -__next_zones_zonelist (15,788,726 samples, 0.13%) - +intel_invalidate_range (69,838,165 samples, 0.14%) + -__list_del_entry_valid (1,719,055 samples, 0.01%) - +dml::detail::ml::impl::hardware::submit (12,752,896 samples, 0.02%) + -get_mem_cgroup_from_mm (38,723,479 samples, 0.31%) - +__list_add_valid (20,737,220 samples, 0.04%) + -__this_cpu_preempt_check (1,373,795 samples, 0.01%) - +__mmu_notifier_invalidate_range (18,972,896 samples, 0.04%) + -entry_SYSCALL_64_after_hwframe (1,901,491 samples, 0.02%) - +entry_SYSCALL_64_after_hwframe (4,893,850 samples, 0.01%) + -error_entry (1,889,126 samples, 0.02%) - +qi_flush_dev_iotlb_pasid (90,923,354 samples, 0.18%) + -___slab_alloc (2,566,038 samples, 0.02%) - +__alloc_pages (652,324,426 samples, 1.27%) + -tick_sched_handle (23,218,296 samples, 0.19%) - +__free_one_page (58,629,895 samples, 0.11%) + -__mod_node_page_state (3,091,882 samples, 0.02%) - +__rcu_read_unlock (5,186,301 samples, 0.01%) + -check_preemption_disabled (6,010,707 samples, 0.05%) - +_raw_spin_unlock (13,824,435 samples, 0.03%) + -page_counter_try_charge (20,650,912 samples, 0.17%) - +dml::detail::ml::buffer<std::allocator<unsigned char>, dml::detail::descriptor, dml::detail::completion_record>::buffer (214,949,728 samples, 0.42%) + -QDPBench (12,410,589,482 samples, 99.99%) -QDPBench +check_preemption_disabled (14,661,066 samples, 0.03%) + -do_anonymous_page (1,964,773,360 samples, 15.83%) -do_anonymous_page +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (7,039,361,847 samples, 13.75%) +unsigned int std::un.. -pmd_page_vaddr (2,581,169 samples, 0.02%) - +perf_event_task_tick (13,548,659 samples, 0.03%) + -check_preemption_disabled (3,602,669 samples, 0.03%) - +__list_add_valid (6,899,733 samples, 0.01%) + -pmd_val (1,373,811 samples, 0.01%) - +pmd_pfn (8,640,684 samples, 0.02%) + -try_charge_memcg (26,086,237 samples, 0.21%) - +intel_invalidate_range (55,187,637 samples, 0.11%) + -fpregs_assert_state_consistent (11,163,115 samples, 0.09%) - +__sysvec_apic_timer_interrupt (16,533,531 samples, 0.03%) + -preempt_count_add (2,574,503 samples, 0.02%) - +free_unref_page_commit (26,737,180 samples, 0.05%) + -free_unref_page_prepare (6,873,448 samples, 0.06%) - +update_process_times (7,679,459 samples, 0.02%) + -sync_regs (191,005,331 samples, 1.54%) - +sync_regs (372,014,368 samples, 0.73%) + -__count_memcg_events (34,391,485 samples, 0.28%) - +check_preemption_disabled (35,439,582 samples, 0.07%) + -__list_del_entry_valid (3,603,255 samples, 0.03%) - +debug_smp_processor_id (7,778,582 samples, 0.02%) + -free_unref_page_prepare (4,303,489 samples, 0.03%) - +lock_vma_under_rcu (5,682,124 samples, 0.01%) + -do_syscall_64 (1,669,914 samples, 0.01%) - +unmap_page_range (1,005,492,248 samples, 1.96%) +u.. -check_preemption_disabled (96,367,288 samples, 0.78%) - +auto dml::detail::submit<dml::hardware, dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >, dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (228,473,295 samples, 0.45%) + -exit_to_user_mode_prepare (16,126,769 samples, 0.13%) - +__rcu_read_unlock (11,236,347 samples, 0.02%) + -mas_walk (27,542,363 samples, 0.22%) - +dsacache::Cache::ExecuteCopy (228,473,295 samples, 0.45%) + -lru_gen_del_folio.constprop.0 (37,000,359 samples, 0.30%) - +intel_invalidate_range (201,181,727 samples, 0.39%) + -__free_one_page (15,489,878 samples, 0.12%) - +qi_submit_sync (90,923,354 samples, 0.18%) + -perf_adjust_freq_unthr_context (1,968,706 samples, 0.02%) - +__this_cpu_preempt_check (5,333,942 samples, 0.01%) + -preempt_count_sub (1,720,136 samples, 0.01%) - +__free_one_page (122,450,173 samples, 0.24%) + -__rcu_read_unlock (5,169,739 samples, 0.04%) - +folio_add_new_anon_rmap (233,995,661 samples, 0.46%) + -do_anonymous_page (419,559,053 samples, 3.38%) -do_.. +do_mprotect_pkey (209,389,327 samples, 0.41%) + -check_preemption_disabled (1,884,966 samples, 0.02%) - +policy_nodemask (32,844,665 samples, 0.06%) + -__this_cpu_preempt_check (2,577,548 samples, 0.02%) - +preempt_count_add (6,537,320 samples, 0.01%) + -check_preemption_disabled (1,723,308 samples, 0.01%) - +perf_adjust_freq_unthr_context (7,679,459 samples, 0.02%) + -_raw_spin_lock (10,296,111 samples, 0.08%) - +preempt_count_add (14,697,153 samples, 0.03%) + -__x64_sys_openat (4,508,436 samples, 0.04%) - +all (51,196,000,582 samples, 100%) + -alloc_empty_file (1,947,956 samples, 0.02%) - +__list_add_valid (5,175,618 samples, 0.01%) + -preempt_count_add (3,440,880 samples, 0.03%) - +dml::core::dispatcher::hw_dispatcher::hw_dispatcher (11,868,621 samples, 0.02%) + -do_user_addr_fault (12,653,436 samples, 0.10%) - +__irqentry_text_end (8,281,604 samples, 0.02%) + -sysvec_apic_timer_interrupt (23,218,296 samples, 0.19%) - +lru_gen_add_folio (128,213,280 samples, 0.25%) + -cgroup_rstat_updated (2,581,575 samples, 0.02%) - +inc_mm_counter (54,606,929 samples, 0.11%) + -lru_gen_add_folio (37,044,181 samples, 0.30%) - +mtree_range_walk (63,113,187 samples, 0.12%) + -__mmu_notifier_invalidate_range_end (47,225,879 samples, 0.38%) - +free_pcppages_bulk (149,181,975 samples, 0.29%) + -__libc_start_call_main (9,403,494,681 samples, 75.76%) -__libc_start_call_main +__x64_sys_madvise (5,811,133 samples, 0.01%) + -__kmem_cache_alloc_node (1,250,263 samples, 0.01%) - +clear_page_erms (81,251,197 samples, 0.16%) + -__this_cpu_preempt_check (8,604,881 samples, 0.07%) - +irqentry_exit_to_user_mode (90,739,272 samples, 0.18%) + -intel_invalidate_range (3,907,002 samples, 0.03%) - +folio_add_lru (7,982,057 samples, 0.02%) + -do_syscall_64 (12,026,411 samples, 0.10%) - +__mod_lruvec_state (65,677,627 samples, 0.13%) + -__this_cpu_preempt_check (1,718,539 samples, 0.01%) - +__list_del_entry_valid (12,474,939 samples, 0.02%) + -vma_alloc_folio (152,198,154 samples, 1.23%) - +auto dml::detail::ml::make_mem_move_task<std::allocator<unsigned char> > (214,949,728 samples, 0.42%) + -__rmqueue_pcplist (19,213,101 samples, 0.15%) - +__count_memcg_events (49,199,350 samples, 0.10%) + -perf_event_task_tick (23,218,296 samples, 0.19%) - +release_pages (300,901,400 samples, 0.59%) + -clear_page_erms (9,608,999 samples, 0.08%) - +Filter<unsigned long, LT, (5,218,225,108 samples, 10.19%) +Filter<unsigne.. -unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (589,743,128 samples, 4.75%) -unsig.. +check_preemption_disabled (1,303,282,835 samples, 2.55%) +ch.. -__libc_openat64 (4,932,701 samples, 0.04%) - +wqs_init (8,667,141 samples, 0.02%) + -__rcu_read_unlock (1,719,354 samples, 0.01%) - +numa_alloc_onnode (18,422,041 samples, 0.04%) + -irqentry_enter (1,720,728 samples, 0.01%) - +__rcu_read_lock (6,542,321 samples, 0.01%) + -do_user_addr_fault (507,393,925 samples, 4.09%) -do_u.. +__this_cpu_preempt_check (11,209,459 samples, 0.02%) + -__mod_lruvec_page_state (36,119,056 samples, 0.29%) - +perf_event_mmap (7,917,062 samples, 0.02%) + -__mod_lruvec_state (12,010,640 samples, 0.10%) - +uncharge_folio (10,349,848 samples, 0.02%) + -entry_SYSCALL_64_after_hwframe (3,907,002 samples, 0.03%) - +folio_mapping (8,263,496 samples, 0.02%) + -__list_add_valid (2,580,518 samples, 0.02%) - +percpu_counter_add_batch (77,773,726 samples, 0.15%) + -access_error (5,162,816 samples, 0.04%) - +preempt_count_sub (7,777,329 samples, 0.02%) + -check_preemption_disabled (5,161,355 samples, 0.04%) - +set_pte (6,053,127 samples, 0.01%) + -__folio_alloc (131,556,371 samples, 1.06%) - +__folio_alloc (377,498,024 samples, 0.74%) + -qi_flush_piotlb (3,439,253 samples, 0.03%) - +__irqentry_text_end (7,399,701 samples, 0.01%) + -qi_submit_sync (20,575,186 samples, 0.17%) - +void fill_mt<unsigned long> (23,680,863,101 samples, 46.26%) +void fill_mt<unsigned long> -__GI_munmap (1,669,914 samples, 0.01%) - +__bitmap_intersects (12,103,165 samples, 0.02%) + -free_swap_cache (1,721,321 samples, 0.01%) - +std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> >::allocate (214,949,728 samples, 0.42%) + -hrtimer_interrupt (3,906,120 samples, 0.03%) - +__bitmap_intersects (30,281,058 samples, 0.06%) + -pfn_pte (2,578,095 samples, 0.02%) - +check_preemption_disabled (14,451,746 samples, 0.03%) + -__GI___mmap64 (4,355,769 samples, 0.04%) - +path_openat (4,455,340 samples, 0.01%) + -Vector_Loader<unsigned long, (192,782,902 samples, 1.55%) - +lru_gen_del_folio.constprop.0 (126,721,284 samples, 0.25%) + -get_mem_cgroup_from_mm (7,619,018 samples, 0.06%) - +__mem_cgroup_uncharge_list (12,937,530 samples, 0.03%) + -tick_sched_handle (3,929,255 samples, 0.03%) - +_start (37,160,744,652 samples, 72.59%) +_start -__mod_zone_page_state (7,742,755 samples, 0.06%) - +unmap_region (1,436,595,546 samples, 2.81%) +un.. -fpregs_assert_state_consistent (11,322,967 samples, 0.09%) - +exc_page_fault (2,032,313,547 samples, 3.97%) +exc_.. -__mod_memcg_lruvec_state (6,016,615 samples, 0.05%) - +__mem_cgroup_charge (9,713,896 samples, 0.02%) + -__count_memcg_events (13,697,832 samples, 0.11%) - +fpregs_assert_state_consistent (43,218,164 samples, 0.08%) + -__this_cpu_preempt_check (1,722,049 samples, 0.01%) - +__mod_lruvec_state (60,366,886 samples, 0.12%) + -free_unref_page_list (54,945,943 samples, 0.44%) - +__list_del_entry_valid (24,208,250 samples, 0.05%) + -_raw_spin_unlock (3,439,025 samples, 0.03%) - +__this_cpu_preempt_check (5,176,133 samples, 0.01%) + -__bitmap_intersects (5,665,820 samples, 0.05%) - +do_anonymous_page (7,558,781,189 samples, 14.76%) +do_anonymous_page -check_preemption_disabled (4,283,497 samples, 0.03%) - +__mod_node_page_state (31,910,261 samples, 0.06%) + -intel_invalidate_range (18,930,372 samples, 0.15%) - +preempt_count_add (7,776,035 samples, 0.02%) + -main (7,899,502,709 samples, 63.64%) -main +__bitmap_intersects (7,404,254 samples, 0.01%) + -sysvec_apic_timer_interrupt (3,906,120 samples, 0.03%) - +check_preemption_disabled (65,685,265 samples, 0.13%) + -__mod_zone_page_state (4,303,601 samples, 0.03%) - +asm_exc_page_fault (59,169,480 samples, 0.12%) + -check_preemption_disabled (22,342,390 samples, 0.18%) - +debug_smp_processor_id (764,412,561 samples, 1.49%) + -percpu_counter_add_batch (13,512,712 samples, 0.11%) - +do_syscall_64 (1,446,084,556 samples, 2.82%) +do.. -syscall (1,901,491 samples, 0.02%) - +folio_batch_move_lru (472,791,106 samples, 0.92%) + -__mod_zone_page_state (6,014,773 samples, 0.05%) - +tick_sched_timer (16,533,531 samples, 0.03%) + -__this_cpu_preempt_check (1,721,513 samples, 0.01%) - +_raw_spin_trylock (29,145,045 samples, 0.06%) + -free_pcppages_bulk (33,517,213 samples, 0.27%) - +folio_add_lru (297,426,407 samples, 0.58%) + -clone3 (12,026,411 samples, 0.10%) - +__rmqueue_pcplist (87,339,301 samples, 0.17%) + -check_preemption_disabled (6,571,340 samples, 0.05%) - +__folio_alloc (658,377,440 samples, 1.29%) + -__rcu_read_lock (3,440,942 samples, 0.03%) - +vma_alloc_folio (445,208,498 samples, 0.87%) + -__mem_cgroup_uncharge_list (4,302,382 samples, 0.03%) - +std::allocator_traits<std::allocator<dml::detail::ml::utils::structure_from<dml::detail::descriptor, dml::detail::completion_record> > >::allocate (214,949,728 samples, 0.42%) + -__pte_alloc (2,399,995 samples, 0.02%) - +scan_b (240,991,297 samples, 0.47%) + -__kmalloc_node (1,250,263 samples, 0.01%) - +cpuset_nodemask_valid_mems_allowed (12,736,238 samples, 0.02%) + -__mod_node_page_state (10,298,551 samples, 0.08%) - +__split_vma (5,175,874 samples, 0.01%) + -folio_mapping (1,886,363 samples, 0.02%) - +do_syscall_64 (12,937,992 samples, 0.03%) + -down_read_trylock (17,205,218 samples, 0.14%) - +lock_vma_under_rcu (177,202,272 samples, 0.35%) + -inc_mm_counter (2,955,852 samples, 0.02%) - +check_preemption_disabled (4,796,679 samples, 0.01%) + -__hrtimer_run_queues (23,218,296 samples, 0.19%) - +dsacache::Cache::SubmitTask (229,070,210 samples, 0.45%) + -get_page_from_freelist (1,702,838 samples, 0.01%) - +__mod_node_page_state (25,394,846 samples, 0.05%) + -pte_alloc_one (4,301,094 samples, 0.03%) - +entry_SYSCALL_64_after_hwframe (14,975,543 samples, 0.03%) + -__handle_mm_fault (448,904,806 samples, 3.62%) -__ha.. +debug_smp_processor_id (8,622,811 samples, 0.02%) + -tick_sched_handle (3,906,120 samples, 0.03%) - +handle_mm_fault (1,853,092,701 samples, 3.62%) +hand.. -__get_vma_policy (4,299,602 samples, 0.03%) - +do_syscall_64 (209,389,327 samples, 0.41%) + -do_mmap (4,355,769 samples, 0.04%) - +count_memcg_events.constprop.0 (132,239,867 samples, 0.26%) + -__mmu_notifier_invalidate_range (5,992,675 samples, 0.05%) - +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::_M_gen_rand (1,754,123,569 samples, 3.43%) +std.. -check_preemption_disabled (335,560,583 samples, 2.70%) -ch.. +down_read_trylock (69,146,736 samples, 0.14%) + -hrtimer_interrupt (3,160,021 samples, 0.03%) - +scan_a (5,219,196,992 samples, 10.19%) +scan_a -do_filp_open (3,516,552 samples, 0.03%) - +__mem_cgroup_charge (615,102,235 samples, 1.20%) + -debug_smp_processor_id (2,399,756 samples, 0.02%) - +check_preemption_disabled (19,022,724 samples, 0.04%) + -irqentry_exit_to_user_mode (27,513,049 samples, 0.22%) - +__rcu_read_unlock (6,050,212 samples, 0.01%) + -sync_regs (81,775,022 samples, 0.66%) - +__mod_zone_page_state (23,194,936 samples, 0.05%) + -tlb_finish_mmu (92,073,316 samples, 0.74%) - +preempt_count_add (8,645,056 samples, 0.02%) + -__handle_mm_fault (10,761,753 samples, 0.09%) - +cgroup_rstat_updated (7,782,352 samples, 0.02%) + -inherit_task_group.isra.0 (9,447,890 samples, 0.08%) - +intel_invalidate_range (18,972,896 samples, 0.04%) + -__mod_node_page_state (10,329,384 samples, 0.08%) - +preempt_count_add (7,910,132 samples, 0.02%) + -std::__atomic_base<std::vector<dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> >, std::allocator<dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> > > >*>::exchange (1,378,883 samples, 0.01%) - +vma_alloc_folio (5,817,196 samples, 0.01%) + -__hrtimer_run_queues (3,906,120 samples, 0.03%) - +__x64_sys_munmap (1,442,633,722 samples, 2.82%) +__.. -mod_lruvec_page_state.constprop.0 (1,372,559 samples, 0.01%) - +try_charge_memcg (1,133,283,682 samples, 2.21%) +t.. -charge_memcg (1,473,067,142 samples, 11.87%) -charge_memcg +exit_to_user_mode_prepare (40,455,338 samples, 0.08%) + -__mod_memcg_lruvec_state (6,527,127 samples, 0.05%) - +__mod_node_page_state (51,742,237 samples, 0.10%) + -debug_smp_processor_id (1,373,840 samples, 0.01%) - +perf_event_init_task (9,487,420 samples, 0.02%) + -free_pages_and_swap_cache (1,721,321 samples, 0.01%) - +lock_vma_under_rcu (109,438,099 samples, 0.21%) + -native_set_pte (1,722,726 samples, 0.01%) - +_raw_spin_lock (63,953,100 samples, 0.12%) + -check_preemption_disabled (3,441,066 samples, 0.03%) - +__mod_memcg_lruvec_state (16,491,655 samples, 0.03%) + -vm_normal_page (6,012,101 samples, 0.05%) - +check_preemption_disabled (6,896,706 samples, 0.01%) + -clear_page_erms (31,784,051 samples, 0.26%) - +__alloc_pages (5,187,926 samples, 0.01%) + -inc_mm_counter (33,246,524 samples, 0.27%) - +do_madvise (5,811,133 samples, 0.01%) + -preempt_count_add (5,162,459 samples, 0.04%) - +__this_cpu_preempt_check (80,379,073 samples, 0.16%) + -update_process_times (2,551,340 samples, 0.02%) - +page_remove_rmap (244,036,722 samples, 0.48%) + -__x64_sys_get_mempolicy (1,901,491 samples, 0.02%) - +perf_iterate_sb.constprop.0 (5,036,520 samples, 0.01%) + -qi_flush_dev_iotlb_pasid (20,575,186 samples, 0.17%) - +main (5,186,627 samples, 0.01%) + -__hrtimer_run_queues (3,929,255 samples, 0.03%) - +pte_alloc_one (4,825,833 samples, 0.01%) + -asm_exc_page_fault (2,779,260,635 samples, 22.39%) -asm_exc_page_fault +check_preemption_disabled (6,479,558 samples, 0.01%) + -policy_nodemask (10,811,789 samples, 0.09%) - +release_pages (593,315,450 samples, 1.16%) + -debug_smp_processor_id (25,810,012 samples, 0.21%) - +tick_sched_timer (7,679,459 samples, 0.02%) + -std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::_M_gen_rand (434,355,181 samples, 3.50%) -std.. +check_preemption_disabled (70,871,080 samples, 0.14%) + -up_read (14,627,484 samples, 0.12%) - +add_wq (8,667,141 samples, 0.02%) + -qi_flush_dev_iotlb_pasid (1,673,902 samples, 0.01%) - +try_charge_memcg (90,530,113 samples, 0.18%) + -check_preemption_disabled (1,372,344 samples, 0.01%) - +sysmalloc (211,429,950 samples, 0.41%) + -std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (589,743,128 samples, 4.75%) -std::.. +operator new (214,949,728 samples, 0.42%) + -__this_cpu_preempt_check (4,110,574 samples, 0.03%) - +__mod_memcg_lruvec_state (18,971,182 samples, 0.04%) + -pmd_val (2,580,930 samples, 0.02%) - +do_vmi_align_munmap (1,442,633,722 samples, 2.82%) +do.. -check_preemption_disabled (9,751,946 samples, 0.08%) - +check_preemption_disabled (7,779,084 samples, 0.02%) + -qi_submit_sync (1,673,902 samples, 0.01%) - +vm_mmap_pgoff (14,975,543 samples, 0.03%) + -__mod_lruvec_state (10,326,666 samples, 0.08%) - +__libc_openat64 (4,893,850 samples, 0.01%) + -lru_add_fn (86,008,598 samples, 0.69%) - +__mod_lruvec_page_state (157,290,019 samples, 0.31%) + -__this_cpu_preempt_check (4,300,899 samples, 0.03%) - +check_preemption_disabled (61,772,249 samples, 0.12%) + -__memset_avx512_unaligned_erms (1,137,938,797 samples, 9.17%) -__memset_avx5.. +folio_lruvec_lock_irqsave (6,047,890 samples, 0.01%) + -perf_adjust_freq_unthr_context (3,906,120 samples, 0.03%) - +dml::core::hardware_device::submit (12,752,896 samples, 0.02%) + -__this_cpu_preempt_check (2,580,587 samples, 0.02%) - +qi_flush_dev_iotlb_pasid (5,175,356 samples, 0.01%) + -update_process_times (23,218,296 samples, 0.19%) - +qi_flush_piotlb (13,797,540 samples, 0.03%) + -_raw_spin_unlock (1,723,108 samples, 0.01%) - +__this_cpu_preempt_check (10,374,314 samples, 0.02%) + -inherit_event.isra.0 (8,588,453 samples, 0.07%) - +cgroup_rstat_updated (5,188,830 samples, 0.01%) + -__list_add_valid (1,543,716 samples, 0.01%) - +__this_cpu_preempt_check (9,482,915 samples, 0.02%) + -release_pages (148,503,966 samples, 1.20%) - +__mod_node_page_state (24,198,689 samples, 0.05%) + -kmem_cache_alloc (1,477,090 samples, 0.01%) - +__mod_memcg_lruvec_state (50,024,604 samples, 0.10%) + -check_preemption_disabled (7,742,661 samples, 0.06%) - +tlb_batch_pages_flush (303,488,983 samples, 0.59%) + -qi_submit_sync (17,208,829 samples, 0.14%) - +debug_smp_processor_id (6,022,653 samples, 0.01%) + -entry_SYSCALL_64_after_hwframe (48,297,259 samples, 0.39%) - +__pte_alloc (10,374,798 samples, 0.02%) + -percpu_counter_add_batch (2,955,852 samples, 0.02%) - +handle_mm_fault (8,017,729,013 samples, 15.66%) +handle_mm_fault -tlb_batch_pages_flush (151,081,301 samples, 1.22%) - +accfg_wq_get_first (8,667,141 samples, 0.02%) + -do_syscall_64 (4,355,769 samples, 0.04%) - +check_preemption_disabled (9,503,613 samples, 0.02%) + -qi_flush_dev_iotlb_pasid (1,721,543 samples, 0.01%) - +__sysvec_apic_timer_interrupt (7,679,459 samples, 0.02%) + -get_page_from_freelist (64,593,491 samples, 0.52%) - +page_counter_try_charge (82,117,644 samples, 0.16%) + -change_protection (47,225,879 samples, 0.38%) - +__mod_node_page_state (19,093,740 samples, 0.04%) + -__pte_alloc (4,301,094 samples, 0.03%) - +hrtimer_interrupt (16,533,531 samples, 0.03%) + -cgroup_rstat_updated (3,439,529 samples, 0.03%) - +__this_cpu_preempt_check (5,169,643 samples, 0.01%) + -handle_mm_fault (2,098,114,425 samples, 16.90%) -handle_mm_fault +irqentry_enter (6,046,216 samples, 0.01%) + -unsigned long std::uniform_int_distribution<unsigned long>::operator (2,590,885,395 samples, 20.87%) -unsigned long std::uniform_int_d.. +preempt_count_add (15,552,403 samples, 0.03%) + -__mmu_notifier_invalidate_range_end (3,907,002 samples, 0.03%) - +zap_page_range_single (5,811,133 samples, 0.01%) + -__mod_zone_page_state (3,599,706 samples, 0.03%) - +do_sys_openat2 (4,893,850 samples, 0.01%) + -_mm512_mask_add_epi64 (399,179,278 samples, 3.22%) -_mm.. +preempt_count_add (12,102,048 samples, 0.02%) + -tick_sched_timer (3,929,255 samples, 0.03%) - +__mod_lruvec_state (43,985,238 samples, 0.09%) + -check_preemption_disabled (3,443,426 samples, 0.03%) - +copy_process (12,076,968 samples, 0.02%) + -asm_exc_page_fault (14,716,433 samples, 0.12%) - +lru_add_fn (178,139,222 samples, 0.35%) + -__mod_node_page_state (10,126,910 samples, 0.08%) - +entry_SYSCALL_64_after_hwframe (10,248,023 samples, 0.02%) + -blk_cgroup_congested (7,448,744 samples, 0.06%) - +__this_cpu_preempt_check (6,052,056 samples, 0.01%) + -find_get_pmu_context (1,721,882 samples, 0.01%) - +do_user_addr_fault (8,304,698,141 samples, 16.22%) +do_user_addr_fault -__GI_mprotect (49,308,740 samples, 0.40%) - +__mod_node_page_state (55,310,667 samples, 0.11%) + -do_syscall_64 (2,757,577 samples, 0.02%) - +inherit_event.isra.0 (9,487,420 samples, 0.02%) + -folio_add_lru (66,382,666 samples, 0.53%) - +update_process_times (15,726,866 samples, 0.03%) + -check_preemption_disabled (3,441,399 samples, 0.03%) - +_raw_spin_lock (42,338,862 samples, 0.08%) + -__this_cpu_preempt_check (21,515,177 samples, 0.17%) - +lru_add_fn (356,992,904 samples, 0.70%) + -qi_flush_dev_iotlb_pasid (3,441,227 samples, 0.03%) - +qi_flush_piotlb (27,586,822 samples, 0.05%) + -memcg_check_events (65,813,626 samples, 0.53%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (10,546,449,255 samples, 20.60%) +unsigned long std::uniform_int_d.. -debug_smp_processor_id (192,848,336 samples, 1.55%) - +_mm512_stream_load_si512 (744,562,723 samples, 1.45%) + -folio_add_new_anon_rmap (50,759,573 samples, 0.41%) - +entry_SYSCALL_64_after_hwframe (5,811,133 samples, 0.01%) + -_int_free (1,501,128 samples, 0.01%) - +entry_SYSCALL_64_after_hwframe (209,389,327 samples, 0.41%) + -asm_sysvec_apic_timer_interrupt (3,976,134 samples, 0.03%) - +asm_sysvec_apic_timer_interrupt (7,679,459 samples, 0.02%) + -__free_one_page (26,642,645 samples, 0.21%) - +_mid_memalign (214,949,728 samples, 0.42%) + -entry_SYSCALL_64_after_hwframe (4,508,436 samples, 0.04%) - +preempt_count_add (15,311,608 samples, 0.03%) + -perf_event_mmap (3,091,521 samples, 0.02%) - +__count_memcg_events (106,303,142 samples, 0.21%) + -internal_get_user_pages_fast (1,360,933 samples, 0.01%) - +preempt_count_add (7,049,342 samples, 0.01%) + -folio_add_lru (148,811,875 samples, 1.20%) - +preempt_count_add (6,543,516 samples, 0.01%) + -qi_submit_sync (3,441,227 samples, 0.03%) - +__mod_zone_page_state (5,186,354 samples, 0.01%) + -unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (1,708,906,008 samples, 13.77%) -unsigned int std::un.. +qi_submit_sync (27,600,815 samples, 0.05%) + -perf_iterate_sb.constprop.0 (3,091,521 samples, 0.02%) - +dml::detail::ml::task<std::allocator<unsigned char> >::task (214,949,728 samples, 0.42%) + -folio_lruvec_lock_irqsave (3,440,417 samples, 0.03%) - +hrtimer_interrupt (7,679,459 samples, 0.02%) + -intel_invalidate_range (47,225,879 samples, 0.38%) - +check_preemption_disabled (23,283,575 samples, 0.05%) + -release_pages (4,466,800 samples, 0.04%) - +accfg_get_param_long (6,285,189 samples, 0.01%) + -scan_a (954,595,693 samples, 7.69%) -scan_a +__this_cpu_preempt_check (38,903,410 samples, 0.08%) + -__mmu_notifier_invalidate_range (10,325,016 samples, 0.08%) - +pmd_val (6,047,707 samples, 0.01%) + -__alloc_pages (88,274,583 samples, 0.71%) - +preempt_count_add (13,829,087 samples, 0.03%) + -asm_sysvec_apic_timer_interrupt (4,549,673 samples, 0.04%) - +tick_sched_handle (15,726,866 samples, 0.03%) + -release_pages (10,327,799 samples, 0.08%) - +uncharge_folio (18,971,713 samples, 0.04%) + -__this_cpu_preempt_check (2,580,155 samples, 0.02%) - +do_syscall_64 (10,248,023 samples, 0.02%) + -__list_add_valid (4,301,025 samples, 0.03%) - +__list_add_valid (11,210,452 samples, 0.02%) + -__GI___close (3,254,804 samples, 0.03%) - +__list_del_entry_valid (12,069,913 samples, 0.02%) + -__fput (1,255,741 samples, 0.01%) - +__GI_munmap (1,446,084,556 samples, 2.82%) +__.. -__sysvec_apic_timer_interrupt (3,906,120 samples, 0.03%) - +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (2,429,077,277 samples, 4.74%) +std::.. -policy_nodemask (6,019,230 samples, 0.05%) - +check_preemption_disabled (19,287,090 samples, 0.04%) + -__slab_alloc.isra.0 (2,566,038 samples, 0.02%) - +__mmu_notifier_invalidate_range_end (69,838,165 samples, 0.14%) + -count_memcg_events.constprop.0 (18,611,478 samples, 0.15%) - +do_user_addr_fault (2,022,843,060 samples, 3.95%) +do_u.. -update_process_times (3,929,255 samples, 0.03%) - +__mod_lruvec_state (22,422,367 samples, 0.04%) + -free_unref_page_list (27,537,505 samples, 0.22%) - +__rcu_read_unlock (6,045,661 samples, 0.01%) + -allocate_slab (1,702,838 samples, 0.01%) - +__list_del_entry_valid (7,758,378 samples, 0.02%) + -__GI_munmap (364,334,473 samples, 2.94%) -__.. +__mod_memcg_lruvec_state (38,908,400 samples, 0.08%) + -perf_iterate_ctx (3,091,521 samples, 0.02%) - +do_vmi_munmap (1,442,633,722 samples, 2.82%) +do.. -__rcu_read_unlock (1,373,709 samples, 0.01%) - +__mod_zone_page_state (16,384,138 samples, 0.03%) + -free_swap_cache (2,577,335 samples, 0.02%) - +folio_mapping (7,779,902 samples, 0.02%) + -unsigned long std::uniform_int_distribution<unsigned long>::operator (589,743,128 samples, 4.75%) -unsig.. +__mod_lruvec_state (30,559,640 samples, 0.06%) + -seq_read_iter (1,723,340 samples, 0.01%) - +perf_event_task_tick (7,679,459 samples, 0.02%) + -check_preemption_disabled (2,745,100 samples, 0.02%) - +mas_walk (43,382,631 samples, 0.08%) + -asm_exc_page_fault (1,023,075,453 samples, 8.24%) -asm_exc_pag.. +numa_node_of_cpu (7,901,696 samples, 0.02%) + -mtree_range_walk (4,617,093 samples, 0.04%) - +__mod_node_page_state (15,523,973 samples, 0.03%) + -preempt_count_add (1,714,558 samples, 0.01%) - +free_unref_page_list (110,356,941 samples, 0.22%) + -get_page_from_freelist (116,065,568 samples, 0.94%) - +tlb_batch_pages_flush (605,386,925 samples, 1.18%) + -cpuset_nodemask_valid_mems_allowed (1,716,142 samples, 0.01%) - +openat (4,893,850 samples, 0.01%) + -unmap_vmas (271,400,381 samples, 2.19%) -u.. +__mmu_notifier_invalidate_range (55,187,637 samples, 0.11%) + -__mod_lruvec_state (12,049,236 samples, 0.10%) - +error_entry (10,160,085 samples, 0.02%) + -folio_add_new_anon_rmap (30,891,685 samples, 0.25%) - +down_read_trylock (50,243,965 samples, 0.10%) + -ksys_read (2,189,844 samples, 0.02%) - +access_error (11,239,905 samples, 0.02%) + -entry_SYSCALL_64_after_hwframe (1,669,914 samples, 0.01%) - +dsacache::Cache::Access (21,598,768 samples, 0.04%) + -numa_bitmask_clearall (1,278,530 samples, 0.01%) - +_int_malloc (214,949,728 samples, 0.42%) + -numactl (1,276,891 samples, 0.01%) - +dsacache::Cache::Access (240,484,366 samples, 0.47%) + -check_preemption_disabled (2,581,236 samples, 0.02%) - +_raw_spin_lock (7,702,324 samples, 0.02%) + -__list_del_entry_valid (6,020,356 samples, 0.05%) - +__x64_sys_mprotect (209,389,327 samples, 0.41%) + -tick_sched_handle (2,551,340 samples, 0.02%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (10,504,101,100 samples, 20.52%) +unsigned long std::uniform_int_d.. -__mmu_notifier_invalidate_range_end (18,930,372 samples, 0.15%) - +scheduler_tick (7,679,459 samples, 0.02%) + -preempt_count_add (5,146,855 samples, 0.04%) - +qi_submit_sync (5,175,356 samples, 0.01%) + -__mod_lruvec_state (5,167,317 samples, 0.04%) - +__GI_mprotect (209,389,327 samples, 0.41%) + -__folio_throttle_swaprate (7,961,416 samples, 0.06%) - +start_thread (11,571,705,609 samples, 22.60%) +start_thread -policy_node (2,583,208 samples, 0.02%) - +check_preemption_disabled (8,062,472 samples, 0.02%) + -preempt_count_sub (2,583,444 samples, 0.02%) - +check_preemption_disabled (12,102,815 samples, 0.02%) + -std::atomic<std::vector<dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> >, std::allocator<dml::handler<dml::mem_copy_operation, std::allocator<unsigned char> > > >*>::exchange (1,378,883 samples, 0.01%) - +__GI_madvise (5,811,133 samples, 0.01%) + -Aggregation<unsigned long, Sum, (1,362,543,537 samples, 10.98%) -Aggregation<unsi.. +kernel_get_mempolicy (7,453,000 samples, 0.01%) + -do_sys_openat2 (4,508,436 samples, 0.04%) - +kernel_clone (12,937,992 samples, 0.03%) + -__mod_lruvec_state (14,596,859 samples, 0.12%) - +dml::handler<dml::mem_copy_operation, dml::execution_interface<dml::hardware, std::allocator<unsigned char> >::allocator_type> dml::submit<dml::hardware, dml::execution_interface<dml::hardware, std::allocator<unsigned char> > > (228,473,295 samples, 0.45%) + -__x64_sys_mprotect (48,297,259 samples, 0.39%) - +do_syscall_64 (5,811,133 samples, 0.01%) + -__x64_sys_madvise (3,907,002 samples, 0.03%) - +[libstdc++.so.6.0.32] (11,565,045,107 samples, 22.59%) +[libstdc++.so.6.0.32] -mmap_region (4,355,769 samples, 0.04%) - +free_pcppages_bulk (71,564,161 samples, 0.14%) + -dsacache::CacheData::WaitOnCompletion (1,378,883 samples, 0.01%) - +void fill_mt<unsigned long> (5,186,627 samples, 0.01%) + -preempt_count_add (4,295,901 samples, 0.03%) - +percpu_counter_add_batch (50,994,413 samples, 0.10%) + -[libstdc++.so.6.0.32] (2,323,402,749 samples, 18.72%) -[libstdc++.so.6.0.32] +free_unref_page_prepare (33,631,349 samples, 0.07%) + -__mod_lruvec_page_state (25,400,890 samples, 0.20%) - +inherit_task_group.isra.0 (9,487,420 samples, 0.02%) + -dsacache::Cache::GetFromCache (1,484,047 samples, 0.01%) - +get_page_from_freelist (558,968,342 samples, 1.09%) + -kernel_clone (11,165,941 samples, 0.09%) - +__mod_zone_page_state (26,737,013 samples, 0.05%) + -__GI___mmap64 (4,355,769 samples, 0.04%) - +access_error (5,680,591 samples, 0.01%) + -check_preemption_disabled (3,439,703 samples, 0.03%) - +pfn_pte (6,051,666 samples, 0.01%) + -pud_val (1,720,480 samples, 0.01%) - +dml::core::dispatcher::hw_dispatcher::get_instance (11,868,621 samples, 0.02%) + -__list_del_entry_valid (5,153,473 samples, 0.04%) - +std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (3,983,113,625 samples, 7.78%) +std::merse.. -__mod_lruvec_state (3,606,180 samples, 0.03%) - +_int_memalign (214,949,728 samples, 0.42%) + -pmd_pfn (3,447,149 samples, 0.03%) - +unmap_vmas (1,075,330,413 samples, 2.10%) +u.. -cgroup_rstat_updated (63,678,308 samples, 0.51%) - +perf_iterate_ctx (5,036,520 samples, 0.01%) + -check_preemption_disabled (44,065,290 samples, 0.36%) - +page_counter_try_charge (13,089,432 samples, 0.03%) + -check_preemption_disabled (3,438,404 samples, 0.03%) - +check_preemption_disabled (220,193,615 samples, 0.43%) + -__this_cpu_preempt_check (1,887,177 samples, 0.02%) - +cgroup_rstat_updated (26,962,924 samples, 0.05%) + -__rcu_read_lock (3,441,202 samples, 0.03%) - +pgd_none (7,778,092 samples, 0.02%) + -kernel_get_mempolicy (1,901,491 samples, 0.02%) - +qi_submit_sync (110,258,373 samples, 0.22%) + -up_read (5,486,366 samples, 0.04%) - +cpuset_nodemask_valid_mems_allowed (16,423,197 samples, 0.03%) + -folio_add_new_anon_rmap (1,379,590 samples, 0.01%) - +qi_flush_piotlb (42,245,564 samples, 0.08%) + -pmd_page_vaddr (1,539,140 samples, 0.01%) - +tlb_finish_mmu (358,676,620 samples, 0.70%) + -Sum<unsigned long>::simd_agg (399,179,278 samples, 3.22%) -Sum.. +__vm_munmap (1,442,633,722 samples, 2.82%) +__.. -__mod_node_page_state (7,745,320 samples, 0.06%) - +__rcu_read_unlock (6,913,905 samples, 0.01%) + -__GI___libc_read (2,696,024 samples, 0.02%) - +__count_memcg_events (1,916,291,637 samples, 3.74%) +__co.. -memcg_check_events (578,196,059 samples, 4.66%) -memcg.. +__mod_zone_page_state (10,350,661 samples, 0.02%) + -__hrtimer_run_queues (3,160,021 samples, 0.03%) - +pgd_none (4,826,207 samples, 0.01%) + -__handle_mm_fault (2,030,172,314 samples, 16.36%) -__handle_mm_fault +mas_walk (77,801,377 samples, 0.15%) + -page_counter_try_charge (4,613,174 samples, 0.04%) - +qi_flush_piotlb (110,258,373 samples, 0.22%) + -preempt_count_add (1,883,612 samples, 0.02%) - +__mod_memcg_lruvec_state (58,635,941 samples, 0.11%) + -free_unref_page_commit (6,008,985 samples, 0.05%) - +clone3 (11,584,643,601 samples, 22.63%) +clone3 -__this_cpu_preempt_check (1,543,675 samples, 0.01%) - +__this_cpu_preempt_check (8,635,064 samples, 0.02%) + -__count_memcg_events (45,948,391 samples, 0.37%) - +__mem_cgroup_charge (5,673,300,607 samples, 11.08%) +__mem_cgroup_cha.. -__mod_lruvec_page_state (1,379,590 samples, 0.01%) - +__sysfs_device_parse (8,667,141 samples, 0.02%) + -exc_page_fault (2,189,316,833 samples, 17.64%) -exc_page_fault +__list_del_entry_valid (6,034,767 samples, 0.01%) + -do_syscall_64 (3,907,002 samples, 0.03%) - +policy_node (7,657,304 samples, 0.01%) + -pte_alloc_one (2,399,995 samples, 0.02%) - +syscall (4,439,475 samples, 0.01%) + -exc_page_fault (514,774,344 samples, 4.15%) -exc_.. +__libc_start_call_main (37,160,744,652 samples, 72.59%) +__libc_start_call_main -std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>::operator (976,265,274 samples, 7.87%) -std::mersen.. +exc_page_fault (38,214,263 samples, 0.07%) + -check_preemption_disabled (3,435,288 samples, 0.03%) - +dsacache::Cache::SubmitTask (12,752,896 samples, 0.02%) + -debug_smp_processor_id (1,721,080 samples, 0.01%) - +check_preemption_disabled (19,837,844 samples, 0.04%) + -mem_cgroup_charge_statistics (531,724,014 samples, 4.28%) -mem_c.. +__libc_start_main_impl (37,160,744,652 samples, 72.59%) +__libc_start_main_impl -perf_adjust_freq_unthr_context (23,218,296 samples, 0.19%) - +check_preemption_disabled (17,246,084 samples, 0.03%) + -exit_to_user_mode_prepare (26,653,249 samples, 0.21%) - +unsigned int std::uniform_int_distribution<unsigned long>::_S_nd<unsigned long, std::mersenne_twister_engine<unsigned long, 32ul, 624ul, 397ul, 31ul, 2567483615ul, 11ul, 4294967295ul, 7ul, 2636928640ul, 15ul, 4022730752ul, 18ul, 1812433253ul>, unsigned int> (2,429,077,277 samples, 4.74%) +unsig.. -sum_check (1,944,773,531 samples, 15.67%) -sum_check +__mod_zone_page_state (31,906,268 samples, 0.06%) + -__folio_throttle_swaprate (12,048,127 samples, 0.10%) - +sysvec_apic_timer_interrupt (16,782,425 samples, 0.03%) + -check_preemption_disabled (11,180,470 samples, 0.09%) - +__this_cpu_preempt_check (8,636,695 samples, 0.02%) + -perf_event_task_tick (3,906,120 samples, 0.03%) - +unsigned long std::uniform_int_distribution<unsigned long>::operator (2,429,077,277 samples, 4.74%) +unsig.. -__rcu_read_lock (3,091,903 samples, 0.02%) - +check_preemption_disabled (19,834,031 samples, 0.04%) + -scheduler_tick (3,929,255 samples, 0.03%) - +___slab_alloc (5,173,381 samples, 0.01%) + -charge_memcg (1,545,503 samples, 0.01%) - +check_preemption_disabled (44,059,020 samples, 0.09%) + -perf_adjust_freq_unthr_context (3,929,255 samples, 0.03%) - +get_page_from_freelist (282,901,670 samples, 0.55%) + -__mod_lruvec_state (9,448,209 samples, 0.08%) - +check_preemption_disabled (8,782,853 samples, 0.02%) + -mprotect_fixup (48,297,256 samples, 0.39%) - +__list_del_entry_valid (5,174,842 samples, 0.01%) + -error_entry (2,585,387 samples, 0.02%) - +scheduler_tick (14,864,641 samples, 0.03%) + -qi_submit_sync (2,233,100 samples, 0.02%) - +pud_val (6,882,155 samples, 0.01%) + -_raw_spin_unlock (3,087,886 samples, 0.02%) - +asm_exc_page_fault (10,602,062,007 samples, 20.71%) +asm_exc_page_fault -dsacache::Cache::Access (1,484,047 samples, 0.01%) - +do_syscall_64 (14,975,543 samples, 0.03%) + -count_memcg_events.constprop.0 (37,832,812 samples, 0.30%) - +free_unref_page_commit (12,938,068 samples, 0.03%) + -sysvec_apic_timer_interrupt (4,549,673 samples, 0.04%) - +mem_cgroup_charge_statistics (2,082,259,880 samples, 4.07%) +mem_.. -perf_event_alloc (6,866,571 samples, 0.06%) - +__mmu_notifier_invalidate_range_end (5,811,133 samples, 0.01%) + -folio_add_lru_vma (1,886,950 samples, 0.02%) - +do_anonymous_page (1,641,693,315 samples, 3.21%) +do_.. -syscall_exit_to_user_mode (2,249,086 samples, 0.02%) - +__memset_avx512_unaligned_erms (4,505,100,311 samples, 8.80%) +__memset_avx.. -vm_mmap_pgoff (4,355,769 samples, 0.04%) - +folio_batch_move_lru (237,550,002 samples, 0.46%) + -__rcu_read_lock (6,021,245 samples, 0.05%) - +qi_submit_sync (27,586,822 samples, 0.05%) + -__this_cpu_preempt_check (3,442,490 samples, 0.03%) - +sync_regs (7,409,456 samples, 0.01%) + -check_preemption_disabled (8,408,263 samples, 0.07%) - +__list_add_valid (8,625,309 samples, 0.02%) + -do_madvise (3,907,002 samples, 0.03%) - +clear_page_erms (156,238,867 samples, 0.31%) + -check_preemption_disabled (1,721,567 samples, 0.01%) - +__count_memcg_events (180,985,616 samples, 0.35%) + -qi_flush_piotlb (6,883,789 samples, 0.06%) - +error_entry (38,027,670 samples, 0.07%) + -__GI___libc_read (2,972,361 samples, 0.02%) - +blk_cgroup_congested (31,984,146 samples, 0.06%) + -__folio_alloc (88,785,026 samples, 0.72%) - +error_entry (28,522,680 samples, 0.06%) + -__mod_zone_page_state (6,882,437 samples, 0.06%) - +irqentry_enter (5,165,357 samples, 0.01%) + -preempt_count_add (2,949,755 samples, 0.02%) - +perf_event_alloc (9,487,420 samples, 0.02%) + -__GI_madvise (3,907,002 samples, 0.03%) - +up_read (30,706,665 samples, 0.06%) +