From a5426ca70e61f62f28e4de6d83cae512ed306fe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rimas=20Misevi=C4=8Dius?= Date: Mon, 15 Jan 2024 20:27:17 +0200 Subject: [PATCH] Remove file_path_format::detect Use `file_path_format::native` as default in the `url_from_file_path` function. The path format detection algorithm was simple: if the first path character is `/`, then it is POSIX format, otherwise - Windows. But in some cases it is ambiguous, e.g. path `//h\sn\p` is recognized as POSIX path, but it can also be Windows UNC path, because `/` is allowed as directory separator in Windows paths. --- include/upa/url.h | 26 +++-------- test/test-url.cpp | 110 +++++++++++++++++++++++++--------------------- 2 files changed, 68 insertions(+), 68 deletions(-) diff --git a/include/upa/url.h b/include/upa/url.h index 11be131..cae8a62 100644 --- a/include/upa/url.h +++ b/include/upa/url.h @@ -3063,11 +3063,8 @@ inline void swap(url& lhs, url& rhs) UPA_NOEXCEPT_17 { /// @brief File path format enum class file_path_format { - detect, ///< detect file path format from first char: '/' - POSIX, otherwise - Windows - ///< (for upa::url_from_file_path, but for upa::path_from_file_url this is - ///< equivalent to @a native) - posix, ///< POSIX file path format - windows, ///< Windows file path format + posix = 1, ///< POSIX file path format + windows, ///< Windows file path format #ifdef _WIN32 native = windows ///< The file path format corresponds to the OS on which the code was compiled #else @@ -3092,16 +3089,15 @@ enum class file_path_format { /// Throws url_error exception on error. /// /// @param[in] str absolute file path string -/// @param[in] format file path format, one of upa::file_path_format::detect, -/// upa::file_path_format::posix, upa::file_path_format::windows, -/// upa::file_path_format::native +/// @param[in] format file path format, one of upa::file_path_format::posix, +/// upa::file_path_format::windows, upa::file_path_format::native /// @return file URL /// @see [Pathname (POSIX)](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271), /// [realpath](https://pubs.opengroup.org/onlinepubs/9699919799/functions/realpath.html), /// [GetFullPathName](https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew), /// [std::filesystem::canonical](https://en.cppreference.com/w/cpp/filesystem/canonical) template = 0> -inline url url_from_file_path(StrT&& str, file_path_format format = file_path_format::detect) { +inline url url_from_file_path(StrT&& str, file_path_format format = file_path_format::native) { using CharT = str_arg_char_t; const auto inp = make_str_arg(std::forward(str)); const auto* first = inp.begin(); @@ -3111,13 +3107,6 @@ inline url url_from_file_path(StrT&& str, file_path_format format = file_path_fo throw url_error(validation_errc::file_empty_path, "Empty file path"); } - if (format == file_path_format::detect) { - format = *first == '/' ? file_path_format::posix : file_path_format::windows; - } else if (format == file_path_format::posix) { - if (*first != '/') - throw url_error(validation_errc::file_unsupported_path, "Non-absolute POSIX path"); - } - const auto* pointer = first; const auto* start_of_check = first; const code_point_set* no_encode_set = nullptr; @@ -3125,6 +3114,8 @@ inline url url_from_file_path(StrT&& str, file_path_format format = file_path_fo std::string str_url("file://"); if (format == file_path_format::posix) { + if (*first != '/') + throw url_error(validation_errc::file_unsupported_path, "Non-absolute POSIX path"); if (detail::has_dot_dot_segment(start_of_check, last, [](CharT c) { return c == '/'; })) throw url_error(validation_errc::file_unsupported_path, "Unsupported file path"); // Absolute POSIX path @@ -3191,9 +3182,6 @@ inline std::string path_from_file_url(const url& file_url, file_path_format form if (!file_url.is_file_scheme()) throw url_error(validation_errc::not_file_url, "Not a file URL"); - if (format == upa::file_path_format::detect) - format = upa::file_path_format::native; - // source const auto hostname = file_url.hostname(); const bool is_host = !hostname.empty(); diff --git a/test/test-url.cpp b/test/test-url.cpp index b8333d4..7e487bc 100644 --- a/test/test-url.cpp +++ b/test/test-url.cpp @@ -611,17 +611,17 @@ TEST_CASE("detail::has_dot_dot_segment") { TEST_CASE("url_from_file_path") { SUBCASE("POSIX path") { - CHECK(upa::url_from_file_path("/").href() == "file:///"); - CHECK(upa::url_from_file_path("/path").href() == "file:///path"); - CHECK(upa::url_from_file_path("/path %#?").href() == "file:///path%20%25%23%3F"); - CHECK(upa::url_from_file_path("/c:\\end").href() == "file:///c%3A%5Cend"); - CHECK(upa::url_from_file_path("/c|\\end").href() == "file:///c%7C%5Cend"); - CHECK(upa::url_from_file_path("/c:/last").href() == "file:///c%3A/last"); - CHECK(upa::url_from_file_path("/c|/last").href() == "file:///c%7C/last"); + CHECK(upa::url_from_file_path("/", upa::file_path_format::posix).href() == "file:///"); + CHECK(upa::url_from_file_path("/path", upa::file_path_format::posix).href() == "file:///path"); + CHECK(upa::url_from_file_path("/path %#?", upa::file_path_format::posix).href() == "file:///path%20%25%23%3F"); + CHECK(upa::url_from_file_path("/c:\\end", upa::file_path_format::posix).href() == "file:///c%3A%5Cend"); + CHECK(upa::url_from_file_path("/c|\\end", upa::file_path_format::posix).href() == "file:///c%7C%5Cend"); + CHECK(upa::url_from_file_path("/c:/last", upa::file_path_format::posix).href() == "file:///c%3A/last"); + CHECK(upa::url_from_file_path("/c|/last", upa::file_path_format::posix).href() == "file:///c%7C/last"); CHECK(upa::url_from_file_path("/\\", upa::file_path_format::posix).href() == "file:///%5C"); CHECK(upa::url_from_file_path("/..\\", upa::file_path_format::posix).href() == "file:///..%5C"); // empty path - CHECK_THROWS_AS(upa::url_from_file_path(""), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("", upa::file_path_format::posix), upa::url_error); // non absolute path CHECK_THROWS_AS(upa::url_from_file_path("path", upa::file_path_format::posix), upa::url_error); CHECK_THROWS_AS(upa::url_from_file_path("C:\\path", upa::file_path_format::posix), upa::url_error); @@ -635,61 +635,66 @@ TEST_CASE("url_from_file_path") { } SUBCASE("Windows path") { // https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file - CHECK(upa::url_from_file_path("C:\\").href() == "file:///C:/"); - CHECK(upa::url_from_file_path("C:\\path").href() == "file:///C:/path"); - CHECK(upa::url_from_file_path("C|\\path").href() == "file:///C:/path"); - CHECK(upa::url_from_file_path("C:/path").href() == "file:///C:/path"); - CHECK(upa::url_from_file_path("C:\\path %#").href() == "file:///C:/path%20%25%23"); + CHECK(upa::url_from_file_path("C:\\", upa::file_path_format::windows).href() == "file:///C:/"); + CHECK(upa::url_from_file_path("C:\\path", upa::file_path_format::windows).href() == "file:///C:/path"); + CHECK(upa::url_from_file_path("C|\\path", upa::file_path_format::windows).href() == "file:///C:/path"); + CHECK(upa::url_from_file_path("C:/path", upa::file_path_format::windows).href() == "file:///C:/path"); + CHECK(upa::url_from_file_path("C:\\path %#", upa::file_path_format::windows).href() == "file:///C:/path%20%25%23"); // UNC: one-character hostname - CHECK(upa::url_from_file_path("\\\\h\\path").href() == "file://h/path"); - CHECK(upa::url_from_file_path("\\\\h\\a/b").href() == "file://h/a/b"); - CHECK(upa::url_from_file_path("\\\\a/b\\path").href() == "file://a/b/path"); + CHECK(upa::url_from_file_path("\\\\h\\path", upa::file_path_format::windows).href() == "file://h/path"); + CHECK(upa::url_from_file_path("\\\\h\\a/b", upa::file_path_format::windows).href() == "file://h/a/b"); + CHECK(upa::url_from_file_path("\\\\a/b\\path", upa::file_path_format::windows).href() == "file://a/b/path"); CHECK(upa::url_from_file_path("//h/path", upa::file_path_format::windows).href() == "file://h/path"); // UNC: two-character hostname and share name - CHECK(upa::url_from_file_path("\\\\ab\\xy").href() == "file://ab/xy"); + CHECK(upa::url_from_file_path("\\\\ab\\xy", upa::file_path_format::windows).href() == "file://ab/xy"); // UNC: three-character hostname and share name - CHECK(upa::url_from_file_path("\\\\abc\\xyz").href() == "file://abc/xyz"); - CHECK(upa::url_from_file_path("\\\\abc\\...").href() == "file://abc/..."); + CHECK(upa::url_from_file_path("\\\\abc\\xyz", upa::file_path_format::windows).href() == "file://abc/xyz"); + CHECK(upa::url_from_file_path("\\\\abc\\...", upa::file_path_format::windows).href() == "file://abc/..."); // UNC: IPv4 and IPv6 hostnames - CHECK(upa::url_from_file_path("\\\\127.0.0.1\\path").href() == "file://127.0.0.1/path"); - CHECK(upa::url_from_file_path("\\\\[::1]\\path").href() == "file://[::1]/path"); + CHECK(upa::url_from_file_path("\\\\127.0.0.1\\path", upa::file_path_format::windows).href() == "file://127.0.0.1/path"); + CHECK(upa::url_from_file_path("\\\\[::1]\\path", upa::file_path_format::windows).href() == "file://[::1]/path"); // Win32 file and device namespaces // https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats // https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation - CHECK(upa::url_from_file_path("\\\\?\\D:\\very_long_path").href() == "file:///D:/very_long_path"); - CHECK(upa::url_from_file_path("\\\\?\\UNC\\h\\very_long_path").href() == "file://h/very_long_path"); - CHECK(upa::url_from_file_path("\\\\?/unc/h/very_long_path").href() == "file://h/very_long_path"); - CHECK(upa::url_from_file_path("\\\\.\\D:\\just_path").href() == "file:///D:/just_path"); - CHECK(upa::url_from_file_path("\\\\.\\UNC\\h\\just_path").href() == "file://h/just_path"); - CHECK(upa::url_from_file_path("\\\\./unc/h/just_path").href() == "file://h/just_path"); + CHECK(upa::url_from_file_path("\\\\?\\D:\\very_long_path", upa::file_path_format::windows).href() == "file:///D:/very_long_path"); + CHECK(upa::url_from_file_path("\\\\?\\UNC\\h\\very_long_path", upa::file_path_format::windows).href() == "file://h/very_long_path"); + CHECK(upa::url_from_file_path("\\\\?/unc/h/very_long_path", upa::file_path_format::windows).href() == "file://h/very_long_path"); + CHECK(upa::url_from_file_path("\\\\.\\D:\\just_path", upa::file_path_format::windows).href() == "file:///D:/just_path"); + CHECK(upa::url_from_file_path("\\\\.\\UNC\\h\\just_path", upa::file_path_format::windows).href() == "file://h/just_path"); + CHECK(upa::url_from_file_path("\\\\./unc/h/just_path", upa::file_path_format::windows).href() == "file://h/just_path"); CHECK(upa::url_from_file_path("//?/unc/h/very_long_path", upa::file_path_format::windows).href() == "file://h/very_long_path"); CHECK(upa::url_from_file_path("//./unc/h/just_path", upa::file_path_format::windows).href() == "file://h/just_path"); + // empty path + CHECK_THROWS_AS(upa::url_from_file_path("", upa::file_path_format::windows), upa::url_error); // non absolute path - CHECK_THROWS_AS(upa::url_from_file_path("\\"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("C:path"), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("C:path", upa::file_path_format::windows), upa::url_error); CHECK_THROWS_AS(upa::url_from_file_path("path", upa::file_path_format::windows), upa::url_error); CHECK_THROWS_AS(upa::url_from_file_path("/", upa::file_path_format::windows), upa::url_error); // invalid UNC - CHECK_THROWS_AS(upa::url_from_file_path("\\\\"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\h"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\\\"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\."), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\.."), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path(std::string{ '\\', '\\', 'h', '\\', 'a', '\0', 'b' }), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\C:\\path"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\C|\\path"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\?\\name"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\.\\name"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\h\\."), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\h\\.."), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\h", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\\\", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\.", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\..", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path(std::string{ '\\', '\\', 'h', '\\', 'a', '\0', 'b' }, + upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\C:\\path", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\C|\\path", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\?\\name", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\.\\name", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\h\\.", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\h\\..", upa::file_path_format::windows), upa::url_error); // UNC: invalid hostname - CHECK_THROWS_AS(upa::url_from_file_path("\\\\a b\\path"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\a?b\\path"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\a#b\\path"), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\a b\\path", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\a?b\\path", upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\a#b\\path", upa::file_path_format::windows), upa::url_error); // unsupported pathes - CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"), upa::url_error); - CHECK_THROWS_AS(upa::url_from_file_path("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt", + upa::file_path_format::windows), upa::url_error); + CHECK_THROWS_AS(upa::url_from_file_path("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt", + upa::file_path_format::windows), upa::url_error); // ".." segments CHECK_THROWS_AS(upa::url_from_file_path("C:\\..", upa::file_path_format::windows), upa::url_error); CHECK_THROWS_AS(upa::url_from_file_path("C:\\..\\", upa::file_path_format::windows), upa::url_error); @@ -700,6 +705,15 @@ TEST_CASE("url_from_file_path") { // null character CHECK_THROWS_AS(upa::url_from_file_path(std::string{ "C:\\p\0", 5 }, upa::file_path_format::windows), upa::url_error); } + SUBCASE("Native path") { +#ifdef _WIN32 + CHECK(upa::url_from_file_path("C:\\").href() == "file:///C:/"); + CHECK(upa::url_from_file_path("C:\\", upa::file_path_format::native).href() == "file:///C:/"); +#else + CHECK(upa::url_from_file_path("/").href() == "file:///"); + CHECK(upa::url_from_file_path("/", upa::file_path_format::native).href() == "file:///"); +#endif + } } TEST_CASE("path_from_file_url") { @@ -749,11 +763,9 @@ TEST_CASE("path_from_file_url") { #ifdef _WIN32 CHECK(path_from_file_url_1("file:///C:") == "C:\\"); CHECK(path_from_file_url("file:///C:", upa::file_path_format::native) == "C:\\"); - CHECK(path_from_file_url("file:///C:", upa::file_path_format::detect) == "C:\\"); #else CHECK(path_from_file_url_1("file:///") == "/"); CHECK(path_from_file_url("file:///", upa::file_path_format::native) == "/"); - CHECK(path_from_file_url("file:///", upa::file_path_format::detect) == "/"); #endif } SUBCASE("Not a file URL") {