#pragma region CPL License /* Nuclex Native Framework Copyright (C) 2002-2021 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #pragma endregion // CPL License // If the library is compiled as a DLL, this ensures symbols are exported #define NUCLEX_STORAGE_SOURCE 1 #include "Nuclex/Storage/FileSystem/Path.h" #if defined(NUCLEX_STORAGE_WIN32) #include "Windows/WindowsFileApi.h" #else #include "Linux/LinuxFileApi.h" #endif #include #include // The code you will find in this file does lots of array acrobatics. This is so // that operations can be performed in the most direct way possible. To ensure // correctness, the unit test suite for this file is very thorough. namespace { // ------------------------------------------------------------------------------------------- // /// Goes up one directory level /// Path that will be changed so it is one directory level higher /// Length of the path's root part /// Directory separator that is used through the path /// /// For internal use only, assumes that the path ends with a directory separator. /// void goUpOneDir(std::string &path, std::size_t rootLength, char separator) { static const std::string up(u8".."); std::string::size_type pathLength = path.length(); // Does the path have the minimum length to contain a '../' segment? if(pathLength >= rootLength + 3) { assert( (path[pathLength - 1] == separator) && u8"Path ends with directory separator" ); // Is the lastmost path segment already a parent dir reference? if((path[pathLength - 3] == '.') && (path[pathLength - 2] == '.')) { path.append(up); path.push_back(separator); return; } } // Is the path long enough for an actual path segment? if(pathLength >= rootLength + 2) { std::string::size_type upDirIndex = path.rfind(separator, pathLength - 2); if(upDirIndex == std::string::npos) { path.clear(); } else if(upDirIndex >= rootLength) { path.resize(upDirIndex + 1); } return; } // Path was too short or contained no removable directory separators, // so go up by appending a '../' to it (this happens if someone wants to go up // from the root directory or such) path.append(up); path.push_back(separator); } // ------------------------------------------------------------------------------------------- // } // anonymous namespace namespace Nuclex { namespace Storage { namespace FileSystem { // ------------------------------------------------------------------------------------------- // // Yep, it's really that simply. Microsoft does one thing, everyone else another. #if defined(NUCLEX_STORAGE_WIN32) const std::string Path::DirectorySeparator = u8"\\"; #else const std::string Path::DirectorySeparator = u8"/"; #endif // ------------------------------------------------------------------------------------------- // const std::string Path::CurrentDirectory(u8"."); // ------------------------------------------------------------------------------------------- // const std::string Path::ParentDirectory(u8".."); // ------------------------------------------------------------------------------------------- // bool Path::IsAbsolute(const std::string &path) { std::string::size_type pathLength = path.size(); // Lets see if the path contains the double colon. In this case, it might be // a URI (http://...) or (Windows builds only) a path with a drive letter. std::string::size_type uriSchemeIndex = path.find(':'); // No double colon? Check for root-based path or UNC path (Windows builds only) if(uriSchemeIndex == std::string::npos) { #if defined(NUCLEX_STORAGE_WIN32) if(pathLength >= 1) { return ((path[0] == '\\') || (path[0] == '/')); // Root-based or UNC } #else if(pathLength >= 2) { return ( ((path[0] == '~') && (path[1] == '/')) || (path[0] == '/') ); } else if(pathLength == 1) { return ( (path[0] == '/') || // Root-based (/opt/mygame) (path[0] == '~') ); } #endif return false; } // Path with drive letter (Windows builds only) #if defined(NUCLEX_STORAGE_WIN32) if((uriSchemeIndex == 1) && (pathLength >= 3)) { return ((path[2] == '\\') || (path[2] == '/')); } #endif // URI-like paths, like file:///test or https://www.example.com. If there are // two slashes behind the URI scheme and anything follows, it's absolute. if((uriSchemeIndex >= 1) && (pathLength >= uriSchemeIndex + 4)) { // TODO: This would let something like /my/directory/mooh://test.file through. return ( (path[uriSchemeIndex + 1] == '/') && (path[uriSchemeIndex + 2] == '/') ); } return false; } // ------------------------------------------------------------------------------------------- // bool Path::IsRelative(const std::string &path) { std::string::size_type pathLength = path.size(); // Lets see if the path contains the double colon. In this case, it might be // a URI (http://...) or (Windows builds only) a path with a drive letter. std::string::size_type uriSchemeIndex = path.find(':'); // No double colon? Check for root-based path or UNC path (Windows builds only) if(uriSchemeIndex == std::string::npos) { #if defined(NUCLEX_STORAGE_WIN32) if(pathLength >= 1) { return ((path[0] != '\\') && (path[0] != '/')); // Root-based or UNC } #else if(pathLength >= 2) { return ( ((path[0] != '~') || (path[1] != '/')) && (path[0] != '/') ); } else if(pathLength == 1) { return ( (path[0] != '/') && // Root-based (/opt/mygame) (path[0] != '~') ); } #endif return true; } // Path with drive letter (Windows builds only) #if defined(NUCLEX_STORAGE_WIN32) if(uriSchemeIndex == 1) { if(pathLength >= 3) { return ((path[2] != '\\') && (path[2] != '/')); } else { return true; } } #endif // URI-like paths, like file:///test or https://www.example.com. If there are // two slashes behind the URI scheme and anything follows, it's absolute. if((uriSchemeIndex >= 1) && (pathLength >= uriSchemeIndex + 4)) { // TODO: This would let something like /my/directory/mooh://test.file through. return ( (path[uriSchemeIndex + 1] != '/') || (path[uriSchemeIndex + 2] != '/') ); } return true; } // ------------------------------------------------------------------------------------------- // std::string Path::GetStem(const std::string &path) { std::string::size_type rootLength = getStemLength(path); if(rootLength >= 1) { return path.substr(0, rootLength); } else { return std::string(); } } // ------------------------------------------------------------------------------------------- // std::string Path::GetFilename(const std::string &path, bool withExtension /* = true */) { if(withExtension) { std::string::size_type filenameStartIndex = getFilenameStartIndex(path); if(filenameStartIndex == 0) { return path; } else { return path.substr(filenameStartIndex); } } else { std::string::size_type filenameStartIndex = getFilenameStartIndex(path); std::string::size_type extensionStartIndex = path.find('.', filenameStartIndex + 1); if(extensionStartIndex == std::string::npos) { return path.substr(filenameStartIndex); } else { return path.substr(filenameStartIndex, extensionStartIndex - filenameStartIndex); } } } // ------------------------------------------------------------------------------------------- // std::string Path::GetExtension(const std::string &path) { std::string::size_type length = path.length(); if(path.length() == 0) { return std::string(); } std::string::size_type filenameStartIndex = getFilenameStartIndex(path); if(filenameStartIndex < length) { std::string::size_type extensionStart = path.find('.', filenameStartIndex + 1); if(extensionStart == std::string::npos) { return std::string(); } else { return path.substr(extensionStart + 1); } } else { return path.substr(filenameStartIndex); } } // ------------------------------------------------------------------------------------------- // std::string Path::Normalize(const std::string &path) { std::string::size_type pathLength = path.length(); // Normalizing a path can only make it shorter, never longer. std::string result; result.reserve(pathLength); // typical paths should change in length only slightly // take over the root of the path unseen std::string::size_type rootLength = getStemLength(path); if(rootLength > 0) { result.append(path, 0, rootLength); } // On Windows, we have backslash paths (normal paths like C:\Temp, UNC paths and // so on) and slash paths (file:///C:/something/test.txt) #if defined(NUCLEX_STORAGE_WIN32) char separator = (rootLength >= 3) ? '/' : '\\'; #else const char separator = '/'; #endif // This method copies the input path into an output path and flattens // any '../' and './' segments along the way. This will process UTF-8 codepoints // in single bytes, but since only '/' and '.' characters are processed individually, // all UTF-8 codepoints will remain intact. std::string::size_type startIndex = rootLength; while(startIndex < pathLength) { // Look for the next directory separator to find the next segment to process std::string::size_type nextSlashIndex = path.find('/', startIndex); #if defined(NUCLEX_STORAGE_WIN32) { std::string::size_type nextBackslashIndex = path.find('\\', startIndex); if(nextSlashIndex == std::string::npos) { nextSlashIndex = nextBackslashIndex; } else if(nextBackslashIndex != std::string::npos) { if(nextBackslashIndex < nextSlashIndex) { nextSlashIndex = nextBackslashIndex; } } } #endif // If we ran out of slashes, we have processed the entire path // (except for the last segment, if the path didn't end with a slash) if(nextSlashIndex == std::string::npos) { break; } // Decide what to do with the current path segment. It's either appended to // the output, eliminated or (in case of a '..') removes the last segment from output. if(startIndex + 2 == nextSlashIndex) { // Is it a parent directory segment? if((path[startIndex] == '.') && (path[startIndex + 1] == '.')) { goUpOneDir(result, rootLength, separator); } else { // Append if it's not a parent directory '../' segment result.append(path, startIndex, nextSlashIndex - startIndex); result.push_back(separator); } } else if(startIndex + 1 == nextSlashIndex) { // Is it a current directory segment? if(path[startIndex] != '.') { // Append if it's not a no-op './' segment result.push_back(path[startIndex]); result.push_back(separator); } } else if(startIndex != nextSlashIndex) { // Is it anything but two slashes in a row? result.append(path, startIndex, nextSlashIndex - startIndex); result.push_back(separator); } startIndex = nextSlashIndex + 1; } // while(startIndex < pathLength) // This block is similar to the one on the loop, but we know that the end // of the string has been reached and we don't want a slash at the end if(startIndex + 2 == pathLength) { if((path[startIndex] == '.') && (path[startIndex + 1] == '.')) { goUpOneDir(result, rootLength, separator); } else { result.append(path, startIndex, pathLength - startIndex); } } else if(startIndex + 1 == pathLength) { if(path[startIndex] != '.') { // Unless it's a no-op './' segment result.push_back(path[startIndex]); } } else { // startIndex cannot be equal to pathLength, so no check this time result.append(path, startIndex, pathLength - startIndex); } // Ensure the normalized path doesn't end in a directory separator pathLength = result.length(); if(pathLength > rootLength) { --pathLength; #if defined(NUCLEX_STORAGE_WIN32) if((result[pathLength] == '/') || (result[pathLength] == '\\')) { result.resize(pathLength); } #else if(result[pathLength] == '/') { result.resize(pathLength); } #endif } return result; } // ------------------------------------------------------------------------------------------- // std::string Path::Resolve(const std::string &path) { #if defined(NUCLEX_STORAGE_WIN32) return Windows::WindowsFileApi::GetFullPathName(path); #else return Linux::LinuxFileApi::RealPath(path); #endif } // ------------------------------------------------------------------------------------------- // std::string Path::Join(const std::string &base, const std::string &other) { // If the base path is empty, return the joined path alone std::size_t baseLength = base.length(); if(baseLength == 0) { return other; } // If the joined path is empty, return the base path only std::size_t otherLength = other.length(); if(otherLength == 0) { return base; } // The path is assumed to be UTF-8, but both supported directory separators fit // into a single 8-bit codepoint (they're ascii after all), so we can elegantly // check only the final character. #if defined(NUCLEX_STORAGE_WIN32) if(base[baseLength - 1] == '\\') { return base + other; } else { std::string result; result.reserve(base.length() + 1 + other.length()); result.append(base); result.push_back('\\'); result.append(other); return result; } #else if(base[baseLength - 1] == '/') { return base + other; } else { std::string result; result.reserve(base.length() + 1 + other.length()); result.append(base); result.push_back('/'); result.append(other); return result; } #endif } // ------------------------------------------------------------------------------------------- // std::string Path::Combine(const std::string &base, const std::string &other) { std::size_t baseLength = base.length(); if(baseLength == 0) { return other; } std::size_t otherLength = other.length(); if(otherLength == 0) { return base; } if(IsAbsolute(other)) { return Normalize(other); } else { return Normalize(Join(base, other)); } } // ------------------------------------------------------------------------------------------- // std::string::size_type Path::getStemLength(const std::string &path) { std::string::size_type pathLength = path.size(); // See if the path starts with a directory separator (which makes it an // absolute path) if(pathLength >= 1) { if(path[0] == '/') { return 1; // Absolute path } #if defined(NUCLEX_STORAGE_WIN32) if(path[0] == '\\') { return 1; // Absolute path (Windows format) } #endif } else { // Path has no length return 0; } // On non-Windows systems, a path starting with ~/ is also absolute and // points to a file or directory starting from the user's home directory #if !defined(NUCLEX_STORAGE_WIN32) if(pathLength >= 2) { if((path[0] == '~') && (path[1] == '/')) { return 2; // Path starting at home directory } } #endif // Look for a double colon. This indicates either a drive letter (Windows only) // or a URI-like path (i.e. file:///thefile) std::string::size_type uriSchemeIndex = path.find(':'); if(uriSchemeIndex != std::string::npos) { #if defined(NUCLEX_STORAGE_WIN32) if(uriSchemeIndex == 1) { if(pathLength == 2) { return 2; // Drive letter only } else if(pathLength >= 3) { if((path[2] == '/') || (path[2] == '\\')) { return 3; // Drive letter with absolute path } else { return 2; // Drive letter with relative path } } } #endif // Does the path look like an URI? (something:///more) if(pathLength >= uriSchemeIndex + 5) { if( (path[uriSchemeIndex + 1] == '/') && (path[uriSchemeIndex + 2] == '/') ) { if(path[uriSchemeIndex + 3] == '/') { return uriSchemeIndex + 4; // URI with root slash } else { return uriSchemeIndex + 3; // URI without root slash } } } else if(pathLength >= uriSchemeIndex + 4) { if( (path[uriSchemeIndex + 1] == '/') && (path[uriSchemeIndex + 2] == '/') ) { return uriSchemeIndex + 3; // URI without root slash and no file/directory } } } // Path has no root and is entirely relative (i.e. going up will eventually // remove all its components) return 0; } // ------------------------------------------------------------------------------------------- // std::string::size_type Path::getFilenameStartIndex(const std::string &path) { std::string::size_type pathLength = path.size(); if(pathLength == 0) { return 0; } // Look for the last directory separator in the path. // If this is a Windows build, also consider backslashes. std::string::size_type lastSlashIndex = path.rfind('/'); #if defined(NUCLEX_STORAGE_WIN32) if(lastSlashIndex == std::string::npos) { lastSlashIndex = path.rfind('\\'); } else { std::string::size_type lastBackslashIndex = path.rfind('\\'); if(lastBackslashIndex != std::string::npos) { if(lastBackslashIndex > lastSlashIndex) { lastSlashIndex = lastBackslashIndex; } } } #endif // Now pick everything after the last directory separator, // but not before the path's root std::string::size_type rootLength = getStemLength(path); if(lastSlashIndex == std::string::npos) { return rootLength; } else { return std::max(rootLength, lastSlashIndex + 1); } } // ------------------------------------------------------------------------------------------- // }}} // namespace Nuclex::Storage::FileSystem