diff --git a/offloading-cacher/cache.hpp b/offloading-cacher/cache.hpp index 8a626dc..82ce9c2 100644 --- a/offloading-cacher/cache.hpp +++ b/offloading-cacher/cache.hpp @@ -352,7 +352,6 @@ inline std::unique_ptr dsacache::Cache::Access(uint8_t* dat // threads data cache structure if (!state.second) { - std::cout << "[!] Found another cache instance for 0x" << std::hex << (uint64_t)task->GetSource() << std::dec << std::endl; return std::move(std::make_unique(state.first->second)); } @@ -379,8 +378,6 @@ inline uint8_t* dsacache::Cache::AllocOnNode(const size_t size, const int node) numa_node_size64(node, &free_space); if (free_space < size) { - std::cout << "[!] Memory shortage when allocating " << size << "B on node " << node << std::endl; - // dst node lacks memory space so we flush the cache for this // node hoping to free enough currently unused entries to make // the second allocation attempt successful @@ -392,8 +389,6 @@ inline uint8_t* dsacache::Cache::AllocOnNode(const size_t size, const int node) numa_node_size64(node, &free_space); if (free_space < size) { - std::cout << "[x] Memory shortage after flush when allocating " << size << "B on node " << node << std::endl; - return nullptr; } } @@ -401,8 +396,6 @@ inline uint8_t* dsacache::Cache::AllocOnNode(const size_t size, const int node) uint8_t* dst = reinterpret_cast(numa_alloc_onnode(size, node)); if (dst == nullptr) { - std::cout << "[x] Allocation try failed for " << size << "B on node " << node << std::endl; - return nullptr; } @@ -413,7 +406,6 @@ inline void dsacache::Cache::SubmitTask(CacheData* task, const int dst_node, con uint8_t* dst = AllocOnNode(task->GetSize(), dst_node); if (dst == nullptr) { - std::cout << "[x] Allocation failed so we can not cache" << std::endl; return; } @@ -699,8 +691,6 @@ inline void dsacache::CacheData::WaitOnCompletion(const bool weak) { auto result = handler.get(); if (result.status != dml::status_code::ok) { - std::cerr << "[x] Encountered bad status code for operation: " << dml::StatusCodeToString(result.status) << std::endl; - // if one of the copy tasks failed we abort the whole task // after all operations are completed on it error = true; diff --git a/offloading-cacher/main.cpp b/offloading-cacher/main.cpp index 8193f5a..df09f5b 100644 --- a/offloading-cacher/main.cpp +++ b/offloading-cacher/main.cpp @@ -100,6 +100,8 @@ std::unique_ptr PerformAccessAndTest(uint8_t* src, const si size * sizeof(uint8_t) ); + data_cache->WaitOnCompletion(dsacache::WAIT_WEAK); + uint8_t* cached_imm = reinterpret_cast(data_cache->GetDataLocation()); // check the value immediately just to see if ram or cache was returned diff --git a/qdp_project/src/Benchmark.cpp b/qdp_project/src/Benchmark.cpp index 1de00ba..481ec70 100644 --- a/qdp_project/src/Benchmark.cpp +++ b/qdp_project/src/Benchmark.cpp @@ -30,11 +30,11 @@ constexpr uint32_t WARMUP_ITERATION_COUNT = 5; constexpr uint32_t ITERATION_COUNT = 5; #ifdef MODE_PREFETCH -constexpr uint32_t GROUP_COUNT = 16; -constexpr size_t CHUNK_SIZE_B = WL_SIZE_B / GROUP_COUNT; -constexpr uint32_t TC_SCANA = 2; +constexpr uint32_t GROUP_COUNT = 8; +constexpr size_t CHUNK_SIZE_B = 16_MiB; +constexpr uint32_t TC_SCANA = 4; constexpr uint32_t TC_SCANB = 1; -constexpr uint32_t TC_AGGRJ = 8; +constexpr uint32_t TC_AGGRJ = 4; constexpr bool PERFORM_CACHING = true; constexpr bool YIELD_ON_CACHE_MISS = false; constexpr bool DATA_IN_HBM = false;