diff --git a/offloading-cacher/cache.hpp b/offloading-cacher/cache.hpp
index 8a626dc..82ce9c2 100644
--- a/offloading-cacher/cache.hpp
+++ b/offloading-cacher/cache.hpp
@@ -352,7 +352,6 @@ inline std::unique_ptr<dsacache::CacheData> dsacache::Cache::Access(uint8_t* dat
         // threads data cache structure
 
         if (!state.second) {
-            std::cout << "[!] Found another cache instance for 0x" << std::hex << (uint64_t)task->GetSource() << std::dec << std::endl;
             return std::move(std::make_unique<CacheData>(state.first->second));
         }
 
@@ -379,8 +378,6 @@ inline uint8_t* dsacache::Cache::AllocOnNode(const size_t size, const int node)
     numa_node_size64(node, &free_space);
 
     if (free_space < size) {
-        std::cout << "[!] Memory shortage when allocating " << size << "B on node " << node << std::endl;
-
         // dst node lacks memory space so we flush the cache for this
         // node hoping to free enough currently unused entries to make
         // the second allocation attempt successful
@@ -392,8 +389,6 @@ inline uint8_t* dsacache::Cache::AllocOnNode(const size_t size, const int node)
         numa_node_size64(node, &free_space);
 
         if (free_space < size) {
-            std::cout << "[x] Memory shortage after flush when allocating " << size << "B on node " << node << std::endl;
-
             return nullptr;
         }
     }
@@ -401,8 +396,6 @@ inline uint8_t* dsacache::Cache::AllocOnNode(const size_t size, const int node)
     uint8_t* dst = reinterpret_cast<uint8_t*>(numa_alloc_onnode(size, node));
 
     if (dst == nullptr) {
-        std::cout << "[x] Allocation try failed for " << size << "B on node " << node << std::endl;
-
         return nullptr;
     }
 
@@ -413,7 +406,6 @@ inline void dsacache::Cache::SubmitTask(CacheData* task, const int dst_node, con
     uint8_t* dst = AllocOnNode(task->GetSize(), dst_node);
 
     if (dst == nullptr) {
-        std::cout << "[x] Allocation failed so we can not cache" << std::endl;
         return;
     }
 
@@ -699,8 +691,6 @@ inline void dsacache::CacheData::WaitOnCompletion(const bool weak) {
         auto result = handler.get();
 
         if (result.status != dml::status_code::ok) {
-            std::cerr << "[x] Encountered bad status code for operation: " << dml::StatusCodeToString(result.status) << std::endl;
-
             // if one of the copy tasks failed we abort the whole task
             // after all operations are completed on it
             error = true;
diff --git a/offloading-cacher/main.cpp b/offloading-cacher/main.cpp
index 8193f5a..df09f5b 100644
--- a/offloading-cacher/main.cpp
+++ b/offloading-cacher/main.cpp
@@ -100,6 +100,8 @@ std::unique_ptr<dsacache::CacheData> PerformAccessAndTest(uint8_t* src, const si
             size * sizeof(uint8_t)
     );
 
+    data_cache->WaitOnCompletion(dsacache::WAIT_WEAK);
+
     uint8_t* cached_imm = reinterpret_cast<uint8_t *>(data_cache->GetDataLocation());
 
     // check the value immediately just to see if ram or cache was returned
diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp
index 1de00ba..481ec70 100644
--- a/qdp_project/src/Benchmark.cpp
+++ b/qdp_project/src/Benchmark.cpp
@@ -30,11 +30,11 @@ constexpr uint32_t WARMUP_ITERATION_COUNT = 5;
 constexpr uint32_t ITERATION_COUNT = 5;
 
 #ifdef MODE_PREFETCH
-constexpr uint32_t GROUP_COUNT = 16;
-constexpr size_t CHUNK_SIZE_B = WL_SIZE_B / GROUP_COUNT;
-constexpr uint32_t TC_SCANA = 2;
+constexpr uint32_t GROUP_COUNT = 8;
+constexpr size_t CHUNK_SIZE_B = 16_MiB;
+constexpr uint32_t TC_SCANA = 4;
 constexpr uint32_t TC_SCANB = 1;
-constexpr uint32_t TC_AGGRJ = 8;
+constexpr uint32_t TC_AGGRJ = 4;
 constexpr bool PERFORM_CACHING = true;
 constexpr bool YIELD_ON_CACHE_MISS = false;
 constexpr bool DATA_IN_HBM = false;