From ec2009afcb18997f3754fcb0ca8b904a2178be65 Mon Sep 17 00:00:00 2001 From: Adam Pritchard Date: Fri, 18 Oct 2019 10:57:32 -0400 Subject: [PATCH 1/2] Improve datastore write robustness This should help with some of the data corruption errors we've been seeing. --- datastore.cpp | 128 ++++++++++++++++++++++++++++++++++++--------- datastore.hpp | 3 -- datastore_test.cpp | 28 ++-------- utils.cpp | 37 +++++++++++++ utils.hpp | 3 ++ 5 files changed, 147 insertions(+), 52 deletions(-) create mode 100644 utils.cpp diff --git a/datastore.cpp b/datastore.cpp index ac83899..93284fd 100644 --- a/datastore.cpp +++ b/datastore.cpp @@ -19,7 +19,9 @@ #include #include +#include #include "datastore.hpp" +#include "utils.hpp" #include "vendor/nlohmann/json.hpp" using json = nlohmann::json; @@ -28,21 +30,22 @@ using namespace std; using namespace psicash; using namespace error; +static string FilePath(const string& file_root, const string& suffix); +static Result FileLoad(const string& file_path); +static Error FileStore(bool paused, const string& file_path, const json& json); Datastore::Datastore() : initialized_(false), json_(json::object()), paused_(false) { } -static string FilePath(const string& file_root, const string& suffix) { - return file_root + "/psicashdatastore" + suffix; -} - Error Datastore::Init(const string& file_root, const string& suffix) { SYNCHRONIZE(mutex_); file_path_ = FilePath(file_root, suffix); - if (auto err = FileLoad(file_path_)) { - return PassError(err); + auto res = FileLoad(file_path_); + if (!res) { + return PassError(res.error()); } + json_ = *res; initialized_ = true; return error::nullerr; } @@ -51,8 +54,13 @@ Error Datastore::Init(const string& file_root, const string& suffix) { Error Datastore::Clear(const string& file_path) { SYNCHRONIZE(mutex_); - json_ = json::object(); - return PassError(FileStore(file_path)); + paused_ = false; + auto empty_json = json::object(); + if (auto err = FileStore(paused_, file_path, empty_json)) { + return PassError(err); + } + json_ = empty_json; + return error::nullerr; } Error Datastore::Clear(const string& file_root, const string& suffix) { @@ -77,20 +85,53 @@ Error Datastore::UnpauseWrites() { return nullerr; } paused_ = false; - return FileStore(file_path_); + return PassError(FileStore(paused_, file_path_, json_)); } Error Datastore::Set(const json& in) { SYNCHRONIZE(mutex_); MUST_BE_INITIALIZED; json_.update(in); - return PassError(FileStore(file_path_)); + return PassError(FileStore(paused_, file_path_, json_)); } -Error Datastore::FileLoad(const string& file_path) { - SYNCHRONIZE(mutex_); +static string FilePath(const string& file_root, const string& suffix) { + return file_root + "/psicashdatastore" + suffix; +} - json_ = json::object(); +/* +More-robust file saving will be achieved like this... + +When writing to file: +1. Write data to a new file `file_path.temp` (overwrite if exists) +2. Delete `file_path.commit`, if it exists (this should not happen, as the last read should have removed it) +3. Rename new file to `file_path.commit` +4. Delete existing `file_path` file +5. Rename `file_path.commit` to `file_path` + +When reading from file: +1. Check if `file_path.commit` exists + a. If so, delete `file_path`, if it exists + b. Rename `file_path.commit` to `file_path` +2. Read `file_path` +*/ + +static constexpr auto TEMP_EXT = ".temp"; +static constexpr auto COMMIT_EXT = ".commit"; + +static Result FileLoad(const string& file_path) { + const auto commit_file_path = file_path + COMMIT_EXT; + + // Do we have an existing commit file to promote? + if (utils::FileExists(commit_file_path)) { + int err; + if (utils::FileExists(file_path) && (err = std::remove(file_path.c_str())) != 0) { + return MakeCriticalError(utils::Stringer("removing file_path failed; err=", err, "; errno=", errno)); + } + if ((err = std::rename(commit_file_path.c_str(), file_path.c_str())) != 0) { + return MakeCriticalError(utils::Stringer("renaming commit_file_path to file_path failed; err=", err, "; errno=", errno)); + } + } ifstream f; f.open(file_path, ios::in | ios::binary); @@ -100,41 +141,80 @@ Error Datastore::FileLoad(const string& file_path) { // It seems like these state achieve approximately what we want. // For details see: https://en.cppreference.com/w/cpp/io/ios_base/iostate if (f.fail()) { - // File probably doesn't exist. Check that we can write here. - return WrapError(FileStore(file_path), "f.fail and FileStore failed"); + // File probably doesn't exist. Check that we can write here by trying to store. + auto empty_object = json::object(); + if (auto err = FileStore(false, file_path, empty_object)) { + return WrapError(err, "f.fail and FileStore failed"); + } + + // File is okay and now has empty object. + return empty_object; } else if (!f.good()) { return MakeCriticalError(utils::Stringer("not f.good; errno=", errno)); } + json json; try { - f >> json_; + f >> json; } catch (json::exception& e) { return MakeCriticalError(utils::Stringer("json load failed: ", e.what(), "; id:", e.id)); } - return nullerr; + return json; } -Error Datastore::FileStore(const string& file_path) { - SYNCHRONIZE(mutex_); - - if (paused_) { +static Error FileStore(bool paused, const string& file_path, const json& json) { + if (paused) { return nullerr; } + const auto temp_file_path = file_path + TEMP_EXT; + const auto commit_file_path = file_path + COMMIT_EXT; + + /* + Write to the temp file + */ + ofstream f; - f.open(file_path, ios::out | ios::trunc | ios::binary); + f.open(temp_file_path, ios::out | ios::trunc | ios::binary); if (!f.is_open()) { - return MakeCriticalError(utils::Stringer("not f.is_open; errno=", errno)); + return MakeCriticalError(utils::Stringer("temp_file_path not f.is_open; errno=", errno)); } try { - f << json_; + f << json; } catch (json::exception& e) { return MakeCriticalError(utils::Stringer("json dump failed: ", e.what(), "; id:", e.id)); } + f.close(); + + /* + Rename temp to commit + */ + + int err; + if (utils::FileExists(commit_file_path) && (err = std::remove(commit_file_path.c_str())) != 0) { + return MakeCriticalError(utils::Stringer("removing commit_file_path failed; err=", err, "; errno=", errno)); + } + + if ((err = std::rename(temp_file_path.c_str(), commit_file_path.c_str())) != 0) { + return MakeCriticalError(utils::Stringer("renaming temp_file_path to commit_file_path failed; err=", err, "; errno=", errno)); + } + + /* + Rename commit to datastore + */ + + if (utils::FileExists(file_path) && (err = std::remove(file_path.c_str())) != 0) { + return MakeCriticalError(utils::Stringer("removing file_path failed; err=", err, "; errno=", errno)); + } + + if ((err = std::rename(commit_file_path.c_str(), file_path.c_str())) != 0) { + return MakeCriticalError(utils::Stringer("renaming commit_file_path to file_path failed; err=", err, "; errno=", errno)); + } + return nullerr; } diff --git a/datastore.hpp b/datastore.hpp index 9dcc5ed..c095ddb 100644 --- a/datastore.hpp +++ b/datastore.hpp @@ -105,9 +105,6 @@ class Datastore { /// Helper for the public Clear methods error::Error Clear(const std::string& file_path); - error::Error FileLoad(const std::string& file_path); - error::Error FileStore(const std::string& file_path); - private: mutable std::recursive_mutex mutex_; bool initialized_; diff --git a/datastore_test.cpp b/datastore_test.cpp index 7755149..fff12d5 100644 --- a/datastore_test.cpp +++ b/datastore_test.cpp @@ -134,7 +134,7 @@ TEST_F(TestDatastore, Clear) // Clear with arguments ds = new Datastore(); err = ds->Clear(temp_dir.c_str(), ds_suffix); - ASSERT_FALSE(err); + ASSERT_FALSE(err) << err.ToString(); // First Get without calling Init; should get "not initialized" error got = ds->Get("k"); @@ -398,7 +398,7 @@ TEST_F(TestDatastore, TypeMismatch) ASSERT_FALSE(err); } -TEST_F(TestDatastore, getSimple) +TEST_F(TestDatastore, GetSimple) { Datastore ds; auto err = ds.Init(GetTempDir().c_str(), ds_suffix); @@ -413,7 +413,7 @@ TEST_F(TestDatastore, getSimple) ASSERT_EQ(*got, want); } -TEST_F(TestDatastore, getNotFound) +TEST_F(TestDatastore, GetNotFound) { Datastore ds; auto err = ds.Init(GetTempDir().c_str(), ds_suffix); @@ -432,25 +432,3 @@ TEST_F(TestDatastore, getNotFound) ASSERT_FALSE(nope); ASSERT_EQ(nope.error(), psicash::Datastore::kNotFound); } - -TEST_F(TestDatastore, clear) -{ - Datastore ds; - auto err = ds.Init(GetTempDir().c_str(), ds_suffix); - ASSERT_FALSE(err); - - string want = "v"; - err = ds.Set({{"k", want}}); - ASSERT_FALSE(err); - - auto got = ds.Get("k"); - ASSERT_TRUE(got); - ASSERT_EQ(*got, want); - - ds.Clear(); - - // There should be nothing in the datastore. - got = ds.Get("k"); - ASSERT_FALSE(got); - ASSERT_EQ(got.error(), psicash::Datastore::kNotFound); -} diff --git a/utils.cpp b/utils.cpp new file mode 100644 index 0000000..1ce51e1 --- /dev/null +++ b/utils.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019, Psiphon Inc. + * All rights reserved. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include "utils.hpp" + +#ifdef _WIN32 + #include + #define access _access_s +#else + #include +#endif + +namespace utils { + +// From https://stackoverflow.com/a/33486052 +bool FileExists(const std::string& filename) +{ + return access(filename.c_str(), 0) == 0; +} + +} diff --git a/utils.hpp b/utils.hpp index b974a58..f2df370 100644 --- a/utils.hpp +++ b/utils.hpp @@ -48,6 +48,9 @@ std::string Stringer(const T& value, const Args& ... args) { /// Synchronize the current scope using the given mutex. #define SYNCHRONIZE(m) std::unique_lock synchronize_lock(m) +/// Tests if the given filepath+name exists. +bool FileExists(const std::string& filename); + } #endif //PSICASHLIB_UTILS_H From f9fcbccbe21336f51d08546665f3017a19a5470a Mon Sep 17 00:00:00 2001 From: Adam Pritchard Date: Fri, 18 Oct 2019 14:04:53 -0400 Subject: [PATCH 2/2] Simplify/clarify file opening logic --- datastore.cpp | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/datastore.cpp b/datastore.cpp index 93284fd..292411b 100644 --- a/datastore.cpp +++ b/datastore.cpp @@ -133,24 +133,20 @@ static Result FileLoad(const string& file_path) { } } - ifstream f; - f.open(file_path, ios::in | ios::binary); - - // Figuring out the cause of an open-file problem (i.e., file doesn't exist vs. filesystem is - // broken) is annoying difficult to do robustly and in a cross-platform manner. - // It seems like these state achieve approximately what we want. - // For details see: https://en.cppreference.com/w/cpp/io/ios_base/iostate - if (f.fail()) { - // File probably doesn't exist. Check that we can write here by trying to store. + if (!utils::FileExists(file_path)) { + // Check that we can write here by trying to store. auto empty_object = json::object(); if (auto err = FileStore(false, file_path, empty_object)) { - return WrapError(err, "f.fail and FileStore failed"); + return WrapError(err, "file doesn't exist and FileStore failed"); } - // File is okay and now has empty object. - return empty_object; - } else if (!f.good()) { - return MakeCriticalError(utils::Stringer("not f.good; errno=", errno)); + // We'll continue on with the rest of the logic, which will read the new empty file. + } + + ifstream f; + f.open(file_path, ios::in | ios::binary); + if (!f) { + return MakeCriticalError(utils::Stringer("file open failed; errno=", errno)); } json json;