#pragma region CPL License /* Nuclex Native Framework Copyright (C) 2002-2023 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #pragma endregion // CPL License #ifndef NUCLEX_SUPPORT_SETTINGS_INIDOCUMENTMODEL_FILEPARSER_H #define NUCLEX_SUPPORT_SETTINGS_INIDOCUMENTMODEL_FILEPARSER_H #include "Nuclex/Support/Config.h" #include "IniDocumentModel.h" // Considered allocation schemes: // // By line -> lots of micro-allocations // In blocks (custom allocator) -> I have to do reference counting to free anything // Load pre-alloc, then by line -> Fast for typical case, no or few micro-allocations // But requires pre-scan of entire file + more code // This could be done with tried-and-proven parser generators such as classic Flex/Yacc/Bison // or Boost.Spirit. However, I wanted something lean, fast and without external dependencies. // // A middleground option would be a modern PEG parser generator like this: // https://github.com/TheLartians/PEGParser // // But the implementation below, even if tedious, gets the job done, fast and efficient. // namespace Nuclex { namespace Support { namespace Settings { // ------------------------------------------------------------------------------------------- // /// Builds a document model by parses an existing .ini file class IniDocumentModel::FileParser { /// Initializes a new .ini file parser /// Full file contents of the .ini file in memory /// Length of the .ini file in bytes public: FileParser(const std::uint8_t *fileContents, std::size_t byteCount); /// Parses the .ini file and fills the specified document model /// /// Document model into which the parsed properties will be written /// public: void ParseInto(IniDocumentModel *documentModel); /// Whether the parsed document used CR-LF line breaks (Windows type) /// True if the parsed document has Windows line breaks public: bool UsesCarriageReturns() const { return (this->windowsLineBreaks > 0); } /// Whether the parsed document had blank lines between properties /// True if the properties were padded with blank lines public: bool UsesBlankLines() const { return (this->blankLines > 0); } /// Whether the parsed document has spaces around the equals sign /// True if the parsed document used spaces around the equals sign public: bool UsesSpacesAroundAssignment() const { return (this->paddedAssignments > 0); } /// Parses a comment, must be called on the comment start character private: void parseComment(); /// Parses a property or section name, must be called on first character private: void parseName(); /// Parses a property value, must be called on first character private: void parseValue(); /// Parses an invalid line until the next line break private: void parseMalformedLine(); /// Submits was has been parsed so far as a line private: void submitLine(); /// Generates a line in which a property is declared /// A property-declaring line filled from the current parser state private: PropertyLine *generatePropertyLine(); /// Generates a line in which a section is declared /// A section-declaring line filled from the current parser state private: SectionLine *generateSectionLine(); /// Retrieves the default section or create a new one if none exists private: IndexedSection *getOrCreateDefaultSection(); /// Resets the parser state private: void resetState(); /// Allocates memory for the specified line and fills its content buffer /// /// Type of line that will be allocated. Must inherit from the type /// /// Line contents that will be stored /// Number of bytes the line long /// The newly allocated line with its content buffer filled private: template TLine *allocateLineChunked( const std::uint8_t *contents, std::size_t byteCount ); /// Allocates memory for the specified type with extra bytes /// Type for which memory will be allocated /// /// Number of extra bytes to make available behind the space used by the type /// /// /// An *uninitialized* pointer to the requested type which is followed by /// the desired amount of extra bytes, aligned to the requirements of the type /// private: template T *allocateChunked(std::size_t extraByteCount = 0); /// The document model into this parser will fill private: IniDocumentModel *target; /// Remaining space in the current allocation chunk private: std::size_t remainingChunkByteCount; /// Section into which parsed elements go currently private: IndexedSection *currentSection; /// Pointer to the beginning of the .ini file in memory private: const std::uint8_t *fileBegin; /// Pointer one past the end of the .ini file in memory private: const std::uint8_t *fileEnd; /// Pointer to the current parsing location private: const std::uint8_t *parsePosition; /// Position at which the current line in the .ini file begins private: const std::uint8_t *lineStart; /// Position at which the current section or property's name starts private: const std::uint8_t *nameStart; /// Position one after the end of the current section or property name private: const std::uint8_t *nameEnd; /// Position at which the current property's value starts, if any private: const std::uint8_t *valueStart; /// Position one after the end of the current property's value, if any private: const std::uint8_t *valueEnd; /// Whether a section was found in the current line private: bool sectionFound; /// Whether an equals sign was found in the current line private: bool equalsSignFound; /// Whether we encountered something that breaks the current line private: bool lineIsMalformed; /// Heuristic - if positive, document uses Windows line breaks private: int windowsLineBreaks; /// Heuristic - if positive, document has blank lines between properties private: int blankLines; /// Heuristic - if positive, document puts spaces around assignment private: int paddedAssignments; }; // ------------------------------------------------------------------------------------------- // }}} // namespace Nuclex::Support::Settings #endif // NUCLEX_SUPPORT_SETTINGS_INIDOCUMENTMODEL_FILEPARSER_H