diff --git a/offloading-cacher/cache.hpp b/offloading-cacher/cache.hpp
old mode 100644
new mode 100755
index 220ef1c..17ac67f
--- a/offloading-cacher/cache.hpp
+++ b/offloading-cacher/cache.hpp
@@ -52,6 +52,7 @@ namespace dsacache {
     constexpr uint64_t FLAG_WAIT_WEAK = 0b1ULL << 63;
     constexpr uint64_t FLAG_HANDLE_PF = 0b1ULL << 62;
     constexpr uint64_t FLAG_ACCESS_WEAK = 0b1ULL << 61;
+    constexpr uint64_t FLAG_FORCE_MAP_PAGES = 0b1ULL << 60;
     constexpr uint64_t FLAG_DEFAULT = 0ULL;
 
     class Cache;
@@ -392,7 +393,7 @@ inline std::unique_ptr<dsacache::CacheData> dsacache::Cache::Access(uint8_t* dat
     // data source location
 
     if (CheckFlag(flags, FLAG_ACCESS_WEAK)) {
-        std::cerr << "WEAK ACCESS FAILED!" << std::endl;
+        std::cerr << "[x] WEAK ACCESS FAILED!" << std::endl;
         task->SetCacheToSource();
         return std::move(task);
     }
@@ -466,6 +467,14 @@ inline uint8_t* dsacache::Cache::AllocOnNode(const size_t size, const int node)
         return nullptr;
     }
 
+    if (CheckFlag(flags_, FLAG_TRIGGER_PAGES)) {
+        static const size_t page_size_b = getpagesize();
+
+        for (size_t i = 0; i < size; i += page_size_b) {
+            dst[i] = 0;
+        }
+    }
+
     return dst;
 }
 
@@ -513,15 +522,15 @@ inline dml::handler<dml::mem_copy_operation, std::allocator<uint8_t>> dsacache::
     dml::data_view dstv = dml::make_view(dst, size);
 
     if (CheckFlag(flags_, FLAG_HANDLE_PF)) {
-        return dml::submit<dml::hardware>(
+        return dml::submit<dml::software>(
                 dml::mem_copy.block_on_fault(), srcv, dstv,
-                dml::execution_interface<dml::hardware,std::allocator<uint8_t>>(), node
+                dml::execution_interface<dml::software,std::allocator<uint8_t>>(), node
         );
     }
     else {
-        return dml::submit<dml::hardware>(
+        return dml::submit<dml::software>(
                 dml::mem_copy, srcv, dstv,
-                dml::execution_interface<dml::hardware,std::allocator<uint8_t>>(), node
+                dml::execution_interface<dml::software,std::allocator<uint8_t>>(), node
         );
     }
 }
@@ -769,7 +778,7 @@ inline void dsacache::CacheData::WaitOnCompletion() {
 
     for (auto& handler : *local_handlers) {
         if (CheckFlag(flags_, FLAG_WAIT_WEAK) && !handler.is_finished()) {
-            std::cerr << "WEAK WAIT FAIL!" << std::endl;
+            std::cerr << "[x] WEAK WAIT FAIL!" << std::endl;
             handlers_->store(local_handlers);
             return;
         }
@@ -779,7 +788,7 @@ inline void dsacache::CacheData::WaitOnCompletion() {
         if (result.status != dml::status_code::ok) {
             // if one of the copy tasks failed we abort the whole task
             // after all operations are completed on it
-            std::cerr << "ERROR (" << dml::StatusCodeToString(result.status) << ") FOUND FOR TASK IN WAIT!" << std::endl;
+            std::cerr << "[x] ERROR (" << dml::StatusCodeToString(result.status) << ") FOUND FOR TASK IN WAIT!" << std::endl;
             error = true;
         }
     }
diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp
index 9d7ede2..e192a79 100644
--- a/qdp_project/src/Benchmark.cpp
+++ b/qdp_project/src/Benchmark.cpp
@@ -101,7 +101,6 @@ void scan_a(size_t gid, size_t tid) {
         uint16_t* mask_ptr = get_mask<TC_SCANA>(MASK_A_, chunk_index, tid);
 
         filter::apply_same(mask_ptr, nullptr, chunk_ptr, CMP_A, CHUNK_SIZE_B / TC_SCANA);
-
     }
 
     THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now();
@@ -195,6 +194,7 @@ int main() {
         // which is configured for xeonmax with smart assignment
         uint64_t cache_flags = 0;
         cache_flags |= dsacache::FLAG_WAIT_WEAK;
+        cache_flags |= dsacache::FLAG_FORCE_MAP_PAGES;
         CACHE_.SetFlags(cache_flags);
         CACHE_.Init(CachePlacementPolicy, CopyMethodPolicy);
     }
diff --git a/qdp_project/src/Configuration.hpp b/qdp_project/src/Configuration.hpp
index b8fcdb9..60327a1 100644
--- a/qdp_project/src/Configuration.hpp
+++ b/qdp_project/src/Configuration.hpp
@@ -13,14 +13,14 @@ constexpr int MEM_NODE_HBM = 8;
 constexpr int MEM_NODE_DRAM = 0;
 
 #ifdef MODE_PREFETCH
-constexpr uint32_t GROUP_COUNT = 16;
-constexpr size_t CHUNK_SIZE_B = 16_MiB;
-constexpr uint32_t TC_SCANA = 2;
+constexpr uint32_t GROUP_COUNT = 8;
+constexpr size_t CHUNK_SIZE_B = 8_MiB;
+constexpr uint32_t TC_SCANA = 4;
 constexpr uint32_t TC_SCANB = 1;
 constexpr uint32_t TC_AGGRJ = 1;
 constexpr bool PERFORM_CACHING = true;
 constexpr int MEM_NODE_A = 0;
-constexpr int MEM_NODE_B = 1;
+constexpr int MEM_NODE_B = 0;
 constexpr char MODE_STRING[] = "prefetch";
 #endif
 #ifdef MODE_DRAM