Browse Source

add option for forcing map of pages by touching each one with a write at its begin, required as somehow behaviour changed, cache was experiencing page fault errors and handling by dsa is simply too slow

master
Constantin Fürst 11 months ago
parent
commit
99552b3de4
  1. 23
      offloading-cacher/cache.hpp
  2. 2
      qdp_project/src/Benchmark.cpp
  3. 8
      qdp_project/src/Configuration.hpp

23
offloading-cacher/cache.hpp

@ -52,6 +52,7 @@ namespace dsacache {
constexpr uint64_t FLAG_WAIT_WEAK = 0b1ULL << 63; constexpr uint64_t FLAG_WAIT_WEAK = 0b1ULL << 63;
constexpr uint64_t FLAG_HANDLE_PF = 0b1ULL << 62; constexpr uint64_t FLAG_HANDLE_PF = 0b1ULL << 62;
constexpr uint64_t FLAG_ACCESS_WEAK = 0b1ULL << 61; constexpr uint64_t FLAG_ACCESS_WEAK = 0b1ULL << 61;
constexpr uint64_t FLAG_FORCE_MAP_PAGES = 0b1ULL << 60;
constexpr uint64_t FLAG_DEFAULT = 0ULL; constexpr uint64_t FLAG_DEFAULT = 0ULL;
class Cache; class Cache;
@ -392,7 +393,7 @@ inline std::unique_ptr<dsacache::CacheData> dsacache::Cache::Access(uint8_t* dat
// data source location // data source location
if (CheckFlag(flags, FLAG_ACCESS_WEAK)) { if (CheckFlag(flags, FLAG_ACCESS_WEAK)) {
std::cerr << "WEAK ACCESS FAILED!" << std::endl;
std::cerr << "[x] WEAK ACCESS FAILED!" << std::endl;
task->SetCacheToSource(); task->SetCacheToSource();
return std::move(task); return std::move(task);
} }
@ -466,6 +467,14 @@ inline uint8_t* dsacache::Cache::AllocOnNode(const size_t size, const int node)
return nullptr; return nullptr;
} }
if (CheckFlag(flags_, FLAG_TRIGGER_PAGES)) {
static const size_t page_size_b = getpagesize();
for (size_t i = 0; i < size; i += page_size_b) {
dst[i] = 0;
}
}
return dst; return dst;
} }
@ -513,15 +522,15 @@ inline dml::handler<dml::mem_copy_operation, std::allocator<uint8_t>> dsacache::
dml::data_view dstv = dml::make_view(dst, size); dml::data_view dstv = dml::make_view(dst, size);
if (CheckFlag(flags_, FLAG_HANDLE_PF)) { if (CheckFlag(flags_, FLAG_HANDLE_PF)) {
return dml::submit<dml::hardware>(
return dml::submit<dml::software>(
dml::mem_copy.block_on_fault(), srcv, dstv, dml::mem_copy.block_on_fault(), srcv, dstv,
dml::execution_interface<dml::hardware,std::allocator<uint8_t>>(), node
dml::execution_interface<dml::software,std::allocator<uint8_t>>(), node
); );
} }
else { else {
return dml::submit<dml::hardware>(
return dml::submit<dml::software>(
dml::mem_copy, srcv, dstv, dml::mem_copy, srcv, dstv,
dml::execution_interface<dml::hardware,std::allocator<uint8_t>>(), node
dml::execution_interface<dml::software,std::allocator<uint8_t>>(), node
); );
} }
} }
@ -769,7 +778,7 @@ inline void dsacache::CacheData::WaitOnCompletion() {
for (auto& handler : *local_handlers) { for (auto& handler : *local_handlers) {
if (CheckFlag(flags_, FLAG_WAIT_WEAK) && !handler.is_finished()) { if (CheckFlag(flags_, FLAG_WAIT_WEAK) && !handler.is_finished()) {
std::cerr << "WEAK WAIT FAIL!" << std::endl;
std::cerr << "[x] WEAK WAIT FAIL!" << std::endl;
handlers_->store(local_handlers); handlers_->store(local_handlers);
return; return;
} }
@ -779,7 +788,7 @@ inline void dsacache::CacheData::WaitOnCompletion() {
if (result.status != dml::status_code::ok) { if (result.status != dml::status_code::ok) {
// if one of the copy tasks failed we abort the whole task // if one of the copy tasks failed we abort the whole task
// after all operations are completed on it // after all operations are completed on it
std::cerr << "ERROR (" << dml::StatusCodeToString(result.status) << ") FOUND FOR TASK IN WAIT!" << std::endl;
std::cerr << "[x] ERROR (" << dml::StatusCodeToString(result.status) << ") FOUND FOR TASK IN WAIT!" << std::endl;
error = true; error = true;
} }
} }

2
qdp_project/src/Benchmark.cpp

@ -101,7 +101,6 @@ void scan_a(size_t gid, size_t tid) {
uint16_t* mask_ptr = get_mask<TC_SCANA>(MASK_A_, chunk_index, tid); uint16_t* mask_ptr = get_mask<TC_SCANA>(MASK_A_, chunk_index, tid);
filter::apply_same(mask_ptr, nullptr, chunk_ptr, CMP_A, CHUNK_SIZE_B / TC_SCANA); filter::apply_same(mask_ptr, nullptr, chunk_ptr, CMP_A, CHUNK_SIZE_B / TC_SCANA);
} }
THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now(); THREAD_TIMING_[SCANA_TIMING_INDEX][UniqueIndex(gid,tid)][0][TIME_STAMP_WAIT] = std::chrono::steady_clock::now();
@ -195,6 +194,7 @@ int main() {
// which is configured for xeonmax with smart assignment // which is configured for xeonmax with smart assignment
uint64_t cache_flags = 0; uint64_t cache_flags = 0;
cache_flags |= dsacache::FLAG_WAIT_WEAK; cache_flags |= dsacache::FLAG_WAIT_WEAK;
cache_flags |= dsacache::FLAG_FORCE_MAP_PAGES;
CACHE_.SetFlags(cache_flags); CACHE_.SetFlags(cache_flags);
CACHE_.Init(CachePlacementPolicy, CopyMethodPolicy); CACHE_.Init(CachePlacementPolicy, CopyMethodPolicy);
} }

8
qdp_project/src/Configuration.hpp

@ -13,14 +13,14 @@ constexpr int MEM_NODE_HBM = 8;
constexpr int MEM_NODE_DRAM = 0; constexpr int MEM_NODE_DRAM = 0;
#ifdef MODE_PREFETCH #ifdef MODE_PREFETCH
constexpr uint32_t GROUP_COUNT = 16;
constexpr size_t CHUNK_SIZE_B = 16_MiB;
constexpr uint32_t TC_SCANA = 2;
constexpr uint32_t GROUP_COUNT = 8;
constexpr size_t CHUNK_SIZE_B = 8_MiB;
constexpr uint32_t TC_SCANA = 4;
constexpr uint32_t TC_SCANB = 1; constexpr uint32_t TC_SCANB = 1;
constexpr uint32_t TC_AGGRJ = 1; constexpr uint32_t TC_AGGRJ = 1;
constexpr bool PERFORM_CACHING = true; constexpr bool PERFORM_CACHING = true;
constexpr int MEM_NODE_A = 0; constexpr int MEM_NODE_A = 0;
constexpr int MEM_NODE_B = 1;
constexpr int MEM_NODE_B = 0;
constexpr char MODE_STRING[] = "prefetch"; constexpr char MODE_STRING[] = "prefetch";
#endif #endif
#ifdef MODE_DRAM #ifdef MODE_DRAM

Loading…
Cancel
Save