Skip to content

Commit

Permalink
Remove file_path_format::detect
Browse files Browse the repository at this point in the history
Use `file_path_format::native` as default in the `url_from_file_path`
function.

The path format detection algorithm was simple: if the first path
character is `/`, then it is POSIX format, otherwise - Windows. But in
some cases it is ambiguous, e.g. path `//h\sn\p` is recognized as POSIX
path, but it can also be Windows UNC path, because `/` is allowed as
directory separator in Windows paths.
  • Loading branch information
rmisev authored Jan 15, 2024
1 parent 7842b29 commit a5426ca
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 68 deletions.
26 changes: 7 additions & 19 deletions include/upa/url.h
Original file line number Diff line number Diff line change
Expand Up @@ -3063,11 +3063,8 @@ inline void swap(url& lhs, url& rhs) UPA_NOEXCEPT_17 {

/// @brief File path format
enum class file_path_format {
detect, ///< detect file path format from first char: '/' - POSIX, otherwise - Windows
///< (for upa::url_from_file_path, but for upa::path_from_file_url this is
///< equivalent to @a native)
posix, ///< POSIX file path format
windows, ///< Windows file path format
posix = 1, ///< POSIX file path format
windows, ///< Windows file path format
#ifdef _WIN32
native = windows ///< The file path format corresponds to the OS on which the code was compiled
#else
Expand All @@ -3092,16 +3089,15 @@ enum class file_path_format {
/// Throws url_error exception on error.
///
/// @param[in] str absolute file path string
/// @param[in] format file path format, one of upa::file_path_format::detect,
/// upa::file_path_format::posix, upa::file_path_format::windows,
/// upa::file_path_format::native
/// @param[in] format file path format, one of upa::file_path_format::posix,
/// upa::file_path_format::windows, upa::file_path_format::native
/// @return file URL
/// @see [Pathname (POSIX)](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271),
/// [realpath](https://pubs.opengroup.org/onlinepubs/9699919799/functions/realpath.html),
/// [GetFullPathName](https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew),
/// [std::filesystem::canonical](https://en.cppreference.com/w/cpp/filesystem/canonical)
template <class StrT, enable_if_str_arg_t<StrT> = 0>
inline url url_from_file_path(StrT&& str, file_path_format format = file_path_format::detect) {
inline url url_from_file_path(StrT&& str, file_path_format format = file_path_format::native) {
using CharT = str_arg_char_t<StrT>;
const auto inp = make_str_arg(std::forward<StrT>(str));
const auto* first = inp.begin();
Expand All @@ -3111,20 +3107,15 @@ inline url url_from_file_path(StrT&& str, file_path_format format = file_path_fo
throw url_error(validation_errc::file_empty_path, "Empty file path");
}

if (format == file_path_format::detect) {
format = *first == '/' ? file_path_format::posix : file_path_format::windows;
} else if (format == file_path_format::posix) {
if (*first != '/')
throw url_error(validation_errc::file_unsupported_path, "Non-absolute POSIX path");
}

const auto* pointer = first;
const auto* start_of_check = first;
const code_point_set* no_encode_set = nullptr;

std::string str_url("file://");

if (format == file_path_format::posix) {
if (*first != '/')
throw url_error(validation_errc::file_unsupported_path, "Non-absolute POSIX path");
if (detail::has_dot_dot_segment(start_of_check, last, [](CharT c) { return c == '/'; }))
throw url_error(validation_errc::file_unsupported_path, "Unsupported file path");
// Absolute POSIX path
Expand Down Expand Up @@ -3191,9 +3182,6 @@ inline std::string path_from_file_url(const url& file_url, file_path_format form
if (!file_url.is_file_scheme())
throw url_error(validation_errc::not_file_url, "Not a file URL");

if (format == upa::file_path_format::detect)
format = upa::file_path_format::native;

// source
const auto hostname = file_url.hostname();
const bool is_host = !hostname.empty();
Expand Down
110 changes: 61 additions & 49 deletions test/test-url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -611,17 +611,17 @@ TEST_CASE("detail::has_dot_dot_segment") {

TEST_CASE("url_from_file_path") {
SUBCASE("POSIX path") {
CHECK(upa::url_from_file_path("/").href() == "file:///");
CHECK(upa::url_from_file_path("/path").href() == "file:///path");
CHECK(upa::url_from_file_path("/path %#?").href() == "file:///path%20%25%23%3F");
CHECK(upa::url_from_file_path("/c:\\end").href() == "file:///c%3A%5Cend");
CHECK(upa::url_from_file_path("/c|\\end").href() == "file:///c%7C%5Cend");
CHECK(upa::url_from_file_path("/c:/last").href() == "file:///c%3A/last");
CHECK(upa::url_from_file_path("/c|/last").href() == "file:///c%7C/last");
CHECK(upa::url_from_file_path("/", upa::file_path_format::posix).href() == "file:///");
CHECK(upa::url_from_file_path("/path", upa::file_path_format::posix).href() == "file:///path");
CHECK(upa::url_from_file_path("/path %#?", upa::file_path_format::posix).href() == "file:///path%20%25%23%3F");
CHECK(upa::url_from_file_path("/c:\\end", upa::file_path_format::posix).href() == "file:///c%3A%5Cend");
CHECK(upa::url_from_file_path("/c|\\end", upa::file_path_format::posix).href() == "file:///c%7C%5Cend");
CHECK(upa::url_from_file_path("/c:/last", upa::file_path_format::posix).href() == "file:///c%3A/last");
CHECK(upa::url_from_file_path("/c|/last", upa::file_path_format::posix).href() == "file:///c%7C/last");
CHECK(upa::url_from_file_path("/\\", upa::file_path_format::posix).href() == "file:///%5C");
CHECK(upa::url_from_file_path("/..\\", upa::file_path_format::posix).href() == "file:///..%5C");
// empty path
CHECK_THROWS_AS(upa::url_from_file_path(""), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("", upa::file_path_format::posix), upa::url_error);
// non absolute path
CHECK_THROWS_AS(upa::url_from_file_path("path", upa::file_path_format::posix), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C:\\path", upa::file_path_format::posix), upa::url_error);
Expand All @@ -635,61 +635,66 @@ TEST_CASE("url_from_file_path") {
}
SUBCASE("Windows path") {
// https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
CHECK(upa::url_from_file_path("C:\\").href() == "file:///C:/");
CHECK(upa::url_from_file_path("C:\\path").href() == "file:///C:/path");
CHECK(upa::url_from_file_path("C|\\path").href() == "file:///C:/path");
CHECK(upa::url_from_file_path("C:/path").href() == "file:///C:/path");
CHECK(upa::url_from_file_path("C:\\path %#").href() == "file:///C:/path%20%25%23");
CHECK(upa::url_from_file_path("C:\\", upa::file_path_format::windows).href() == "file:///C:/");
CHECK(upa::url_from_file_path("C:\\path", upa::file_path_format::windows).href() == "file:///C:/path");
CHECK(upa::url_from_file_path("C|\\path", upa::file_path_format::windows).href() == "file:///C:/path");
CHECK(upa::url_from_file_path("C:/path", upa::file_path_format::windows).href() == "file:///C:/path");
CHECK(upa::url_from_file_path("C:\\path %#", upa::file_path_format::windows).href() == "file:///C:/path%20%25%23");
// UNC: one-character hostname
CHECK(upa::url_from_file_path("\\\\h\\path").href() == "file://h/path");
CHECK(upa::url_from_file_path("\\\\h\\a/b").href() == "file://h/a/b");
CHECK(upa::url_from_file_path("\\\\a/b\\path").href() == "file://a/b/path");
CHECK(upa::url_from_file_path("\\\\h\\path", upa::file_path_format::windows).href() == "file://h/path");
CHECK(upa::url_from_file_path("\\\\h\\a/b", upa::file_path_format::windows).href() == "file://h/a/b");
CHECK(upa::url_from_file_path("\\\\a/b\\path", upa::file_path_format::windows).href() == "file://a/b/path");
CHECK(upa::url_from_file_path("//h/path", upa::file_path_format::windows).href() == "file://h/path");
// UNC: two-character hostname and share name
CHECK(upa::url_from_file_path("\\\\ab\\xy").href() == "file://ab/xy");
CHECK(upa::url_from_file_path("\\\\ab\\xy", upa::file_path_format::windows).href() == "file://ab/xy");
// UNC: three-character hostname and share name
CHECK(upa::url_from_file_path("\\\\abc\\xyz").href() == "file://abc/xyz");
CHECK(upa::url_from_file_path("\\\\abc\\...").href() == "file://abc/...");
CHECK(upa::url_from_file_path("\\\\abc\\xyz", upa::file_path_format::windows).href() == "file://abc/xyz");
CHECK(upa::url_from_file_path("\\\\abc\\...", upa::file_path_format::windows).href() == "file://abc/...");
// UNC: IPv4 and IPv6 hostnames
CHECK(upa::url_from_file_path("\\\\127.0.0.1\\path").href() == "file://127.0.0.1/path");
CHECK(upa::url_from_file_path("\\\\[::1]\\path").href() == "file://[::1]/path");
CHECK(upa::url_from_file_path("\\\\127.0.0.1\\path", upa::file_path_format::windows).href() == "file://127.0.0.1/path");
CHECK(upa::url_from_file_path("\\\\[::1]\\path", upa::file_path_format::windows).href() == "file://[::1]/path");
// Win32 file and device namespaces
// https://learn.microsoft.com/en-us/dotnet/standard/io/file-path-formats
// https://learn.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation
CHECK(upa::url_from_file_path("\\\\?\\D:\\very_long_path").href() == "file:///D:/very_long_path");
CHECK(upa::url_from_file_path("\\\\?\\UNC\\h\\very_long_path").href() == "file://h/very_long_path");
CHECK(upa::url_from_file_path("\\\\?/unc/h/very_long_path").href() == "file://h/very_long_path");
CHECK(upa::url_from_file_path("\\\\.\\D:\\just_path").href() == "file:///D:/just_path");
CHECK(upa::url_from_file_path("\\\\.\\UNC\\h\\just_path").href() == "file://h/just_path");
CHECK(upa::url_from_file_path("\\\\./unc/h/just_path").href() == "file://h/just_path");
CHECK(upa::url_from_file_path("\\\\?\\D:\\very_long_path", upa::file_path_format::windows).href() == "file:///D:/very_long_path");
CHECK(upa::url_from_file_path("\\\\?\\UNC\\h\\very_long_path", upa::file_path_format::windows).href() == "file://h/very_long_path");
CHECK(upa::url_from_file_path("\\\\?/unc/h/very_long_path", upa::file_path_format::windows).href() == "file://h/very_long_path");
CHECK(upa::url_from_file_path("\\\\.\\D:\\just_path", upa::file_path_format::windows).href() == "file:///D:/just_path");
CHECK(upa::url_from_file_path("\\\\.\\UNC\\h\\just_path", upa::file_path_format::windows).href() == "file://h/just_path");
CHECK(upa::url_from_file_path("\\\\./unc/h/just_path", upa::file_path_format::windows).href() == "file://h/just_path");
CHECK(upa::url_from_file_path("//?/unc/h/very_long_path", upa::file_path_format::windows).href() == "file://h/very_long_path");
CHECK(upa::url_from_file_path("//./unc/h/just_path", upa::file_path_format::windows).href() == "file://h/just_path");
// empty path
CHECK_THROWS_AS(upa::url_from_file_path("", upa::file_path_format::windows), upa::url_error);
// non absolute path
CHECK_THROWS_AS(upa::url_from_file_path("\\"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C:path"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C:path", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("path", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("/", upa::file_path_format::windows), upa::url_error);
// invalid UNC
CHECK_THROWS_AS(upa::url_from_file_path("\\\\"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\\\"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\."), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\.."), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path(std::string{ '\\', '\\', 'h', '\\', 'a', '\0', 'b' }), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\C:\\path"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\C|\\path"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\?\\name"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\.\\name"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\h\\."), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\h\\.."), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\\\", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\.", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\..", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path(std::string{ '\\', '\\', 'h', '\\', 'a', '\0', 'b' },
upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\C:\\path", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\C|\\path", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\?\\name", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\.\\name", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\h\\.", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\UNC\\h\\..", upa::file_path_format::windows), upa::url_error);
// UNC: invalid hostname
CHECK_THROWS_AS(upa::url_from_file_path("\\\\a b\\path"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\a?b\\path"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\a#b\\path"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\a b\\path", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\a?b\\path", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\a#b\\path", upa::file_path_format::windows), upa::url_error);
// unsupported pathes
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt",
upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt",
upa::file_path_format::windows), upa::url_error);
// ".." segments
CHECK_THROWS_AS(upa::url_from_file_path("C:\\..", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C:\\..\\", upa::file_path_format::windows), upa::url_error);
Expand All @@ -700,6 +705,15 @@ TEST_CASE("url_from_file_path") {
// null character
CHECK_THROWS_AS(upa::url_from_file_path(std::string{ "C:\\p\0", 5 }, upa::file_path_format::windows), upa::url_error);
}
SUBCASE("Native path") {
#ifdef _WIN32
CHECK(upa::url_from_file_path("C:\\").href() == "file:///C:/");
CHECK(upa::url_from_file_path("C:\\", upa::file_path_format::native).href() == "file:///C:/");
#else
CHECK(upa::url_from_file_path("/").href() == "file:///");
CHECK(upa::url_from_file_path("/", upa::file_path_format::native).href() == "file:///");
#endif
}
}

TEST_CASE("path_from_file_url") {
Expand Down Expand Up @@ -749,11 +763,9 @@ TEST_CASE("path_from_file_url") {
#ifdef _WIN32
CHECK(path_from_file_url_1("file:///C:") == "C:\\");
CHECK(path_from_file_url("file:///C:", upa::file_path_format::native) == "C:\\");
CHECK(path_from_file_url("file:///C:", upa::file_path_format::detect) == "C:\\");
#else
CHECK(path_from_file_url_1("file:///") == "/");
CHECK(path_from_file_url("file:///", upa::file_path_format::native) == "/");
CHECK(path_from_file_url("file:///", upa::file_path_format::detect) == "/");
#endif
}
SUBCASE("Not a file URL") {
Expand Down

0 comments on commit a5426ca

Please sign in to comment.