From 53d2b33aee9a70efce0c1067b3d243106b7a86c1 Mon Sep 17 00:00:00 2001 From: David Wu Date: Sun, 17 Dec 2023 12:37:05 -0500 Subject: [PATCH] Cleanup testgpuerror implementation and coverage of most other nn outputs --- CONTRIBUTORS | 1 + cpp/command/contribute.cpp | 4 +- cpp/command/gputest.cpp | 17 +- cpp/tests/testnnevalcanary.cpp | 478 +++++++++++++++++++++------------ cpp/tests/tests.h | 16 +- 5 files changed, 326 insertions(+), 190 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index bd71ae7b0..d9a5fa6cd 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -44,6 +44,7 @@ Sebastian H ("nerai") - Minor code cleanup Jochen Voss ("seehuhn") - Typo fix in doc "kinfkong" - Added trt build configuration option "TTXS123OK" - Minor code style improvement. +Chin-Chang Yang - For a very useful GPU backend error testing command. Separately from the authors of the content in this repo, additional special thanks to: Junyan Xu ("alreadydone") - for much testing and troubleshooting for Windows support diff --git a/cpp/command/contribute.cpp b/cpp/command/contribute.cpp index ef6c79549..b3e84c299 100644 --- a/cpp/command/contribute.cpp +++ b/cpp/command/contribute.cpp @@ -918,8 +918,8 @@ int MainCmds::contribute(const vector& args) { // Cap test to avoid spawning too many threads when many selfplay games are running const int maxBatchSizeCap = std::min(4, 1 + nnEval->getMaxBatchSize()/2); bool fp32BatchSuccessBuf = true; - string baseFileName = ""; - bool success = Tests::runFP16Test(nnEval,nnEval32,logger,boardSizeTest,maxBatchSizeCap,verbose,quickTest,fp32BatchSuccessBuf, baseFileName); + string referenceFileName = ""; + bool success = Tests::runBackendErrorTest(nnEval,nnEval32,logger,boardSizeTest,maxBatchSizeCap,verbose,quickTest,fp32BatchSuccessBuf,referenceFileName); if(!fp32BatchSuccessBuf) { logger.write("Error: large GPU numerical errors, unable to continue"); shouldStop.store(true); diff --git a/cpp/command/gputest.cpp b/cpp/command/gputest.cpp index c7084da57..aa3af69d3 100644 --- a/cpp/command/gputest.cpp +++ b/cpp/command/gputest.cpp @@ -26,7 +26,7 @@ int MainCmds::testgpuerror(const vector& args) { string modelFile; int boardSize; bool quickTest; - string baseFileName; + string referenceFileName; try { KataGoCommandLine cmd("Test GPU error between FP16 and FP32 with and without batching"); cmd.addConfigFileArg(KataGoCommandLine::defaultGtpConfigFileName(),"gtp_example.cfg"); @@ -35,8 +35,8 @@ int MainCmds::testgpuerror(const vector& args) { TCLAP::SwitchArg quickArg("","quick","Faster shorter test"); cmd.add(boardSizeArg); cmd.add(quickArg); - TCLAP::ValueArg baseFileArg("", "basefile", "Base file to be generated by Eigen backend; loaded by other backends for cross-backend check", false, "", "FILE"); - cmd.add(baseFileArg); + TCLAP::ValueArg referenceFileArg("", "reference-file", "Reference file to be generated by Eigen backend; loaded by other backends for cross-backend check, if not specified then will use the backend's own FP32 as reference", false, "", "FILE"); + cmd.add(referenceFileArg); cmd.setShortUsageArgLimit(); cmd.addOverrideConfigArg(); @@ -46,7 +46,7 @@ int MainCmds::testgpuerror(const vector& args) { modelFile = cmd.getModelFile(); boardSize = boardSizeArg.getValue(); quickTest = quickArg.getValue(); - baseFileName = baseFileArg.getValue(); + referenceFileName = referenceFileArg.getValue(); cmd.getConfig(cfg); if(boardSize != 19 && boardSize != 13 && boardSize != 9) @@ -78,12 +78,7 @@ int MainCmds::testgpuerror(const vector& args) { logger.write("For batch test, using default batch size 16"); } const int maxConcurrentEvals = maxBatchSize * 2 + 16; - int expectedConcurrentEvals = maxBatchSize * 2 + 16; - -#ifdef USE_EIGEN_BACKEND - if(expectedConcurrentEvals > 2) - expectedConcurrentEvals = 2; -#endif + const int expectedConcurrentEvals = maxBatchSize; const bool defaultRequireExactNNLen = false; @@ -116,7 +111,7 @@ int MainCmds::testgpuerror(const vector& args) { const int maxBatchSizeCap = -1; const bool verbose = true; bool fp32BatchSuccessBuf = true; - bool success = Tests::runFP16Test(nnEval,nnEval32,logger,boardSize,maxBatchSizeCap,verbose,quickTest,fp32BatchSuccessBuf, baseFileName); + bool success = Tests::runBackendErrorTest(nnEval,nnEval32,logger,boardSize,maxBatchSizeCap,verbose,quickTest,fp32BatchSuccessBuf,referenceFileName); (void)success; // cout << success << endl; diff --git a/cpp/tests/testnnevalcanary.cpp b/cpp/tests/testnnevalcanary.cpp index d99aa6e76..b8da016ba 100644 --- a/cpp/tests/testnnevalcanary.cpp +++ b/cpp/tests/testnnevalcanary.cpp @@ -1,11 +1,15 @@ #include "../tests/tests.h" +#include "../core/fileutils.h" #include "../neuralnet/nneval.h" #include "../dataio/sgf.h" +#include "../external/nlohmann_json/json.hpp" + //------------------------ #include "../core/using.h" //------------------------ +using json = nlohmann::json; void Tests::runCanaryTests(NNEvaluator* nnEval, int symmetry, bool print) { { @@ -171,12 +175,27 @@ void Tests::runCanaryTests(NNEvaluator* nnEval, int symmetry, bool print) { struct GpuErrorStats { std::vector winrateError; - std::vector scoreError; + std::vector leadError; + std::vector scoreMeanError; + std::vector scoreStdevError; std::vector topPolicyDiff; std::vector policyKLDiv; + std::vector shorttermWinlossErrorError; + std::vector shorttermScoreErrorError; + std::vector ownershipError; void appendStats(const std::shared_ptr& base, const std::shared_ptr& other) { - winrateError.push_back(std::abs(0.5*(base->whiteWinProb - base->whiteLossProb) - 0.5*(other->whiteWinProb - other->whiteLossProb))); - scoreError.push_back(std::abs(base->whiteLead - other->whiteLead)); + winrateError.push_back( + std::abs(0.5*(base->whiteWinProb - base->whiteLossProb) - 0.5*(other->whiteWinProb - other->whiteLossProb)) + + std::abs(base->whiteNoResultProb - other->whiteNoResultProb) + ); + leadError.push_back(std::abs(base->whiteLead - other->whiteLead)); + scoreMeanError.push_back(std::abs(base->whiteScoreMean - other->whiteScoreMean)); + scoreStdevError.push_back( + std::abs( + sqrt(std::max(0.0, (double)base->whiteScoreMeanSq - base->whiteScoreMean*base->whiteScoreMean)) - + sqrt(std::max(0.0, (double)other->whiteScoreMeanSq - other->whiteScoreMean*other->whiteScoreMean)) + ) + ); int topPolicyIdx = 0; double topPolicyProb = -1; @@ -195,7 +214,24 @@ struct GpuErrorStats { } } policyKLDiv.push_back(klDivSum); - }; + + //A metric indicating the "typical" error in the winloss value or the score that the net expects, relative to the + //short-term future MCTS value. + + shorttermWinlossErrorError.push_back(std::abs(base->shorttermWinlossError - other->shorttermWinlossError)); + shorttermScoreErrorError.push_back(std::abs(base->shorttermScoreError - other->shorttermScoreError)); + + testAssert(base->whiteOwnerMap != NULL); + testAssert(other->whiteOwnerMap != NULL); + testAssert(base->nnXLen == other->nnXLen); + testAssert(base->nnYLen == other->nnYLen); + for(int y = 0; ynnYLen; y++) { + for(int x = 0; xnnXLen; x++) { + int pos = NNPos::xyToPos(x,y,base->nnXLen); + ownershipError.push_back(std::abs(base->whiteOwnerMap[pos] - other->whiteOwnerMap[pos])); + } + } + } double getAverage(std::vector& vec) { double sum = 0; @@ -215,140 +251,207 @@ struct GpuErrorStats { return sortedVec[sortedVec.size()-1]; } - bool checkStats99(double wr, double score, double tpd, double pkld) { + void sortErrors() { std::sort(winrateError.begin(),winrateError.end()); - std::sort(scoreError.begin(),scoreError.end()); + std::sort(leadError.begin(),leadError.end()); + std::sort(scoreMeanError.begin(),scoreMeanError.end()); + std::sort(scoreStdevError.begin(),scoreStdevError.end()); std::sort(topPolicyDiff.begin(),topPolicyDiff.end()); std::sort(policyKLDiv.begin(),policyKLDiv.end()); + std::sort(shorttermWinlossErrorError.begin(),shorttermWinlossErrorError.end()); + std::sort(shorttermScoreErrorError.begin(),shorttermScoreErrorError.end()); + std::sort(ownershipError.begin(),ownershipError.end()); + } + + bool checkStats99(double wr, double score, double tpd, double pkld) { + sortErrors(); return ( 100*get99Percentile(winrateError) <= wr && - get99Percentile(scoreError) <= score && + get99Percentile(leadError) <= score && + get99Percentile(scoreMeanError) <= score && + get99Percentile(scoreStdevError) <= score*0.6 && 100*get99Percentile(topPolicyDiff) <= tpd && - get99Percentile(policyKLDiv) <= pkld + get99Percentile(policyKLDiv) <= pkld && + 100*get99Percentile(shorttermWinlossErrorError) <= wr*1.8 && + get99Percentile(shorttermScoreErrorError) <= score*0.75 && + 100*get99Percentile(ownershipError) <= wr*1.75 ); } bool checkStatsMax(double wr, double score, double tpd, double pkld) { - std::sort(winrateError.begin(),winrateError.end()); - std::sort(scoreError.begin(),scoreError.end()); - std::sort(topPolicyDiff.begin(),topPolicyDiff.end()); - std::sort(policyKLDiv.begin(),policyKLDiv.end()); + sortErrors(); return ( 100*getMaxPercentile(winrateError) <= wr && - getMaxPercentile(scoreError) <= score && + getMaxPercentile(leadError) <= score && + getMaxPercentile(scoreMeanError) <= score && + getMaxPercentile(scoreStdevError) <= score*0.6 && 100*getMaxPercentile(topPolicyDiff) <= tpd && - getMaxPercentile(policyKLDiv) <= pkld + getMaxPercentile(policyKLDiv) <= pkld && + 100*getMaxPercentile(shorttermWinlossErrorError) <= wr*1.8 && + getMaxPercentile(shorttermScoreErrorError) <= score*0.75 && + 100*getMaxPercentile(ownershipError) <= wr*4.0 // more lenient since ownership maxes over more stuff ); } void reportStats(const string& name, Logger& logger) { - std::sort(winrateError.begin(),winrateError.end()); - std::sort(scoreError.begin(),scoreError.end()); - std::sort(topPolicyDiff.begin(),topPolicyDiff.end()); - std::sort(policyKLDiv.begin(),policyKLDiv.end()); + sortErrors(); + auto rpad = [](const string& s, int n) { + if(s.size() < n) + return s + std::string(n - s.size(),' '); + return s; + }; logger.write( - name + " winrateError: " + + rpad(name + " winrateError: ", 60) + Global::strprintf( - "%7.5f%% %7.5f%% %7.5f%% %7.5f%%", + " %7.5f%% %7.5f%% %7.5f%% %7.5f%%", 100*getAverage(winrateError), 100*get90Percentile(winrateError), 100*get99Percentile(winrateError), 100*getMaxPercentile(winrateError) ) ); logger.write( - name + " scoreError: " + + rpad(name + " leadError: ", 60) + + Global::strprintf( + " %7.5f %7.5f %7.5f %7.5f", + getAverage(leadError), get90Percentile(leadError), get99Percentile(leadError), getMaxPercentile(leadError)) + ); + logger.write( + rpad(name + " scoreMeanError: ", 60) + Global::strprintf( - " %7.5f %7.5f %7.5f %7.5f", - getAverage(scoreError), get90Percentile(scoreError), get99Percentile(scoreError), getMaxPercentile(scoreError)) + " %7.5f %7.5f %7.5f %7.5f", + getAverage(scoreMeanError), get90Percentile(scoreMeanError), get99Percentile(scoreMeanError), getMaxPercentile(scoreMeanError)) ); logger.write( - name + " topPolicyDelta: " + + rpad(name + " scoreStdevError:", 60) + Global::strprintf( - "%7.5f%% %7.5f%% %7.5f%% %7.5f%%", + " %7.5f %7.5f %7.5f %7.5f", + getAverage(scoreStdevError), get90Percentile(scoreStdevError), get99Percentile(scoreStdevError), getMaxPercentile(scoreStdevError)) + ); + logger.write( + rpad(name + " topPolicyDelta: ", 60) + + Global::strprintf( + " %7.5f%% %7.5f%% %7.5f%% %7.5f%%", 100*getAverage(topPolicyDiff), 100*get90Percentile(topPolicyDiff), 100*get99Percentile(topPolicyDiff), 100*getMaxPercentile(topPolicyDiff)) ); logger.write( - name + " policyKLDiv: " + + rpad(name + " policyKLDiv: ", 60) + Global::strprintf( - "%8.6f %8.6f %8.6f %8.6f", + " %8.6f %8.6f %8.6f %8.6f", getAverage(policyKLDiv), get90Percentile(policyKLDiv), get99Percentile(policyKLDiv), getMaxPercentile(policyKLDiv)) ); + logger.write( + rpad(name + " stWLErrorError:", 60) + + Global::strprintf( + " %7.5fc %7.5fc %7.5fc %7.5fc", + 100*getAverage(shorttermWinlossErrorError), 100*get90Percentile(shorttermWinlossErrorError), 100*get99Percentile(shorttermWinlossErrorError), 100*getMaxPercentile(shorttermWinlossErrorError)) + ); + logger.write( + rpad(name + " stScErrorError:", 60) + + Global::strprintf( + " %7.5f %7.5f %7.5f %7.5f", + getAverage(shorttermScoreErrorError), get90Percentile(shorttermScoreErrorError), get99Percentile(shorttermScoreErrorError), getMaxPercentile(shorttermScoreErrorError)) + ); + logger.write( + rpad(name + " ownershipError:", 60) + + Global::strprintf( + " %7.5fc %7.5fc %7.5fc %7.5fc", + 100*getAverage(ownershipError), 100*get90Percentile(ownershipError), 100*get99Percentile(ownershipError), 100*getMaxPercentile(ownershipError)) + ); } }; -void saveBaseToFile(const std::vector>& base, const string& baseFileName, Logger& logger, bool verbose) { - assert(baseFileName != ""); - std::ofstream outFile(baseFileName, std::ios::binary); - - if (!outFile) - throw StringError("Unable to save base to: " + baseFileName); - - size_t size = base.size(); - outFile.write(reinterpret_cast(&size), sizeof(size)); - - for (const auto& nnOutputPtr : base) { - if (nnOutputPtr) { - outFile.write(reinterpret_cast(&nnOutputPtr->nnHash), sizeof(nnOutputPtr->nnHash)); - outFile.write(reinterpret_cast(&nnOutputPtr->whiteWinProb), sizeof(nnOutputPtr->whiteWinProb)); - outFile.write(reinterpret_cast(&nnOutputPtr->whiteLossProb), sizeof(nnOutputPtr->whiteLossProb)); - outFile.write(reinterpret_cast(&nnOutputPtr->whiteNoResultProb), sizeof(nnOutputPtr->whiteNoResultProb)); - outFile.write(reinterpret_cast(&nnOutputPtr->whiteScoreMean), sizeof(nnOutputPtr->whiteScoreMean)); - outFile.write(reinterpret_cast(&nnOutputPtr->whiteScoreMeanSq), sizeof(nnOutputPtr->whiteScoreMeanSq)); - outFile.write(reinterpret_cast(&nnOutputPtr->whiteLead), sizeof(nnOutputPtr->whiteLead)); - outFile.write(reinterpret_cast(&nnOutputPtr->varTimeLeft), sizeof(nnOutputPtr->varTimeLeft)); - outFile.write(reinterpret_cast(&nnOutputPtr->shorttermWinlossError), sizeof(nnOutputPtr->shorttermWinlossError)); - outFile.write(reinterpret_cast(&nnOutputPtr->shorttermScoreError), sizeof(nnOutputPtr->shorttermScoreError)); - outFile.write(reinterpret_cast(nnOutputPtr->policyProbs), sizeof(float) * NNPos::MAX_NN_POLICY_SIZE); - outFile.write(reinterpret_cast(&nnOutputPtr->nnXLen), sizeof(nnOutputPtr->nnXLen)); - outFile.write(reinterpret_cast(&nnOutputPtr->nnYLen), sizeof(nnOutputPtr->nnYLen)); - } - } - - if (verbose) - logger.write("Saved " + Global::uint64ToString((uint64_t)base.size()) + " positions to: " + baseFileName); +static std::string nnOutputToJson(const std::shared_ptr& nnOutput) { + json ret; + ret["nnHash"] = nnOutput->nnHash.toString(); + ret["whiteWinProb"] = nnOutput->whiteWinProb; + ret["whiteLossProb"] = nnOutput->whiteLossProb; + ret["whiteNoResultProb"] = nnOutput->whiteNoResultProb; + ret["whiteScoreMean"] = nnOutput->whiteScoreMean; + ret["whiteScoreMeanSq"] = nnOutput->whiteScoreMeanSq; + ret["whiteLead"] = nnOutput->whiteLead; + ret["varTimeLeft"] = nnOutput->varTimeLeft; + ret["shorttermWinlossError"] = nnOutput->shorttermWinlossError; + ret["shorttermScoreError"] = nnOutput->shorttermScoreError; + ret["policyProbs"] = std::vector(&(nnOutput->policyProbs[0]), &(nnOutput->policyProbs[0]) + NNPos::MAX_NN_POLICY_SIZE); + ret["policyOptimismUsed"] = nnOutput->policyOptimismUsed; + ret["nnXLen"] = nnOutput->nnXLen; + ret["nnYLen"] = nnOutput->nnYLen; + testAssert(nnOutput->whiteOwnerMap != NULL); + ret["whiteOwnerMap"] = std::vector(nnOutput->whiteOwnerMap, nnOutput->whiteOwnerMap + nnOutput->nnXLen*nnOutput->nnYLen); + return std::string(ret.dump()); +} - outFile.close(); +static std::shared_ptr nnOutputOfJson(const std::string& s) { + std::shared_ptr nnOutput = std::make_shared(); + json input = json::parse(s); + nnOutput->nnHash = Hash128::ofString(input["nnHash"].get()); + nnOutput->whiteWinProb = input["whiteWinProb"].get(); + nnOutput->whiteLossProb = input["whiteLossProb"].get(); + nnOutput->whiteNoResultProb = input["whiteNoResultProb"].get(); + nnOutput->whiteScoreMean = input["whiteScoreMean"].get(); + nnOutput->whiteScoreMeanSq = input["whiteScoreMeanSq"].get(); + nnOutput->whiteLead = input["whiteLead"].get(); + nnOutput->varTimeLeft = input["varTimeLeft"].get(); + nnOutput->shorttermWinlossError = input["shorttermWinlossError"].get(); + nnOutput->shorttermScoreError = input["shorttermScoreError"].get(); + std::vector policyProbs = input["policyProbs"].get>(); + testAssert(policyProbs.size() == NNPos::MAX_NN_POLICY_SIZE); + std::copy(policyProbs.begin(),policyProbs.end(),nnOutput->policyProbs); + nnOutput->policyOptimismUsed = input["policyOptimismUsed"].get(); + nnOutput->nnXLen = input["nnXLen"].get(); + nnOutput->nnYLen = input["nnYLen"].get(); + testAssert(nnOutput->nnXLen >= 2 && nnOutput->nnXLen <= NNPos::MAX_BOARD_LEN); + testAssert(nnOutput->nnYLen >= 2 && nnOutput->nnYLen <= NNPos::MAX_BOARD_LEN); + std::vector whiteOwnerMap = input["whiteOwnerMap"].get>(); + testAssert(whiteOwnerMap.size() == nnOutput->nnXLen*nnOutput->nnYLen); + nnOutput->whiteOwnerMap = new float[nnOutput->nnXLen*nnOutput->nnYLen]; + std::copy(whiteOwnerMap.begin(),whiteOwnerMap.end(),nnOutput->whiteOwnerMap); + nnOutput->noisedPolicyProbs = nullptr; + return nnOutput; } -void loadBaseFromFile(std::vector>& base, const string& baseFileName, Logger& logger, bool verbose) { - assert(baseFileName != ""); - std::ifstream inFile(baseFileName, std::ios::binary); - - if (!inFile) - throw StringError("Unable to load: " + baseFileName); - - size_t size; - inFile.read(reinterpret_cast(&size), sizeof(size)); - base.resize(size); - - for (size_t i = 0; i < size; ++i) { - base[i] = std::make_shared(); - - inFile.read(reinterpret_cast(&base[i]->nnHash), sizeof(base[i]->nnHash)); - inFile.read(reinterpret_cast(&base[i]->whiteWinProb), sizeof(base[i]->whiteWinProb)); - inFile.read(reinterpret_cast(&base[i]->whiteLossProb), sizeof(base[i]->whiteLossProb)); - inFile.read(reinterpret_cast(&base[i]->whiteNoResultProb), sizeof(base[i]->whiteNoResultProb)); - inFile.read(reinterpret_cast(&base[i]->whiteScoreMean), sizeof(base[i]->whiteScoreMean)); - inFile.read(reinterpret_cast(&base[i]->whiteScoreMeanSq), sizeof(base[i]->whiteScoreMeanSq)); - inFile.read(reinterpret_cast(&base[i]->whiteLead), sizeof(base[i]->whiteLead)); - inFile.read(reinterpret_cast(&base[i]->varTimeLeft), sizeof(base[i]->varTimeLeft)); - inFile.read(reinterpret_cast(&base[i]->shorttermWinlossError), sizeof(base[i]->shorttermWinlossError)); - inFile.read(reinterpret_cast(&base[i]->shorttermScoreError), sizeof(base[i]->shorttermScoreError)); - inFile.read(reinterpret_cast(&base[i]->policyProbs), sizeof(float) * NNPos::MAX_NN_POLICY_SIZE); - inFile.read(reinterpret_cast(&base[i]->nnXLen), sizeof(base[i]->nnXLen)); - inFile.read(reinterpret_cast(&base[i]->nnYLen), sizeof(base[i]->nnYLen)); - - base[i]->whiteOwnerMap = nullptr; - base[i]->noisedPolicyProbs = nullptr; +static void saveReferenceValuesToFile(const std::vector>& referenceValues, const string& referenceFileName, Logger& logger, bool verbose) { + testAssert(referenceFileName != ""); + std::ofstream outFile; + FileUtils::open(outFile,referenceFileName); + if(!outFile) + throw StringError("Unable to save reference values to: " + referenceFileName); + + for(const std::shared_ptr& nnOutput : referenceValues) { + testAssert(nnOutput != nullptr); + outFile << nnOutputToJson(nnOutput) << "\n"; } + if(verbose) + logger.write("Saved reference values for " + Global::uint64ToString((uint64_t)referenceValues.size()) + " positions to: " + referenceFileName); - if (verbose) - logger.write("Loaded " + Global::uint64ToString((uint64_t)base.size()) + " positions from: " + baseFileName); + outFile.close(); +} + +static void loadReferenceValuesFromFile(std::vector>& referenceValues, const string& referenceFileName, Logger& logger, bool verbose) { + testAssert(referenceFileName != ""); + referenceValues.clear(); + std::vector lines = FileUtils::readFileLines(referenceFileName,'\n'); - inFile.close(); + for(const string& line: lines) { + if(Global::trim(line) != "") { + referenceValues.push_back(nnOutputOfJson(line)); + } + } + if(verbose) + logger.write("Loaded reference values for " + Global::uint64ToString((uint64_t)referenceValues.size()) + " positions from: " + referenceFileName); } -bool Tests::runFP16Test(NNEvaluator* nnEval, NNEvaluator* nnEval32, Logger& logger, int boardSize, int maxBatchSizeCap, bool verbose, bool quickTest, bool& fp32BatchSuccessBuf, const string& baseFileName) { +bool Tests::runBackendErrorTest( + NNEvaluator* nnEval, + NNEvaluator* nnEval32, + Logger& logger, + int boardSize, + int maxBatchSizeCap, + bool verbose, + bool quickTest, + bool& fp32BatchSuccessBuf, + const string& referenceFileName +) { int maxBatchSize = nnEval->getMaxBatchSize(); if(maxBatchSize != nnEval32->getMaxBatchSize()) @@ -358,11 +461,6 @@ bool Tests::runFP16Test(NNEvaluator* nnEval, NNEvaluator* nnEval32, Logger& logg if(maxBatchSize <= 0) throw StringError("Invalid max batch size for fp16 test"); -#ifdef USE_EIGEN_BACKEND - if (baseFileName == "") - return true; -#endif - Rand filterRand("Tests::runFP16Test filter rand"); auto loadHists = [&](const std::vector& sgfStrs) { std::vector hists; @@ -411,108 +509,136 @@ bool Tests::runFP16Test(NNEvaluator* nnEval, NNEvaluator* nnEval32, Logger& logg return buf.result; }; - { - if(verbose) - logger.write("Running evaluations in fp32"); - std::vector> base; - - bool loadedBaseFromFile = false; - + std::vector> referenceValues; + bool loadedReferenceValuesFromFile = false; #ifndef USE_EIGEN_BACKEND - if (baseFileName != "") { - loadBaseFromFile(base, baseFileName, logger, verbose); - loadedBaseFromFile = true; - } + if(referenceFileName != "") { + loadReferenceValuesFromFile(referenceValues, referenceFileName, logger, verbose); + loadedReferenceValuesFromFile = true; + } #endif + (void)loadReferenceValuesFromFile; - if (!loadedBaseFromFile) - for(const BoardHistory& hist: hists) - base.push_back(evalBoard(nnEval32,hist)); + std::vector> fp32; + std::vector> fp32Batched(hists.size()); + std::vector> current; + std::vector> currentBatched(hists.size()); -#ifdef USE_EIGEN_BACKEND - assert(baseFileName != ""); - saveBaseToFile(base, baseFileName, logger, verbose); -#endif + if(verbose) + logger.write("Beginning evaluations! These may take a long time on pure CPU, or on a weak GPU, but on a decent GPU shouldn't take too long."); + + if(verbose) + logger.write("Running evaluations in fp32"); + for(const BoardHistory& hist: hists) + fp32.push_back(evalBoard(nnEval32,hist)); - std::vector> batched(hists.size()); - std::vector> current; - std::vector> cbatched(hists.size()); + Rand rand; + + if(maxBatchSize <= 1) + fp32Batched = fp32; + else { + if(verbose) + logger.write("Running batched evaluations in fp32"); + auto runThread = [&](int threadIdx) { + for(size_t i = threadIdx; i permutation(maxBatchSize); + rand.fillShuffledUIntRange(maxBatchSize, permutation.data()); + vector threads; + for(int i = 0; i permutation(maxBatchSize); + rand.fillShuffledUIntRange(maxBatchSize, permutation.data()); vector threads; for(int i = 0; i threads; - for(int i = 0; i>& candidateValues, GpuErrorStats& stats) { + for(size_t i = 0; i