#pragma region CPL License
/*
Nuclex Native Framework
Copyright (C) 2002-2023 Nuclex Development Labs
This library is free software; you can redistribute it and/or
modify it under the terms of the IBM Common Public License as
published by the IBM Corporation; either version 1.0 of the
License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
IBM Common Public License for more details.
You should have received a copy of the IBM Common Public
License along with this library
*/
#pragma endregion // CPL License
#ifndef NUCLEX_SUPPORT_SETTINGS_INIDOCUMENTMODEL_FILEPARSER_H
#define NUCLEX_SUPPORT_SETTINGS_INIDOCUMENTMODEL_FILEPARSER_H
#include "Nuclex/Support/Config.h"
#include "IniDocumentModel.h"
// Considered allocation schemes:
//
// By line -> lots of micro-allocations
// In blocks (custom allocator) -> I have to do reference counting to free anything
// Load pre-alloc, then by line -> Fast for typical case, no or few micro-allocations
// But requires pre-scan of entire file + more code
// This could be done with tried-and-proven parser generators such as classic Flex/Yacc/Bison
// or Boost.Spirit. However, I wanted something lean, fast and without external dependencies.
//
// A middleground option would be a modern PEG parser generator like this:
// https://github.com/TheLartians/PEGParser
//
// But the implementation below, even if tedious, gets the job done, fast and efficient.
//
namespace Nuclex { namespace Support { namespace Settings {
// ------------------------------------------------------------------------------------------- //
/// Builds a document model by parses an existing .ini file
class IniDocumentModel::FileParser {
/// Initializes a new .ini file parser
/// Full file contents of the .ini file in memory
/// Length of the .ini file in bytes
public: FileParser(const std::uint8_t *fileContents, std::size_t byteCount);
/// Parses the .ini file and fills the specified document model
///
/// Document model into which the parsed properties will be written
///
public: void ParseInto(IniDocumentModel *documentModel);
/// Whether the parsed document used CR-LF line breaks (Windows type)
/// True if the parsed document has Windows line breaks
public: bool UsesCarriageReturns() const { return (this->windowsLineBreaks > 0); }
/// Whether the parsed document had blank lines between properties
/// True if the properties were padded with blank lines
public: bool UsesBlankLines() const { return (this->blankLines > 0); }
/// Whether the parsed document has spaces around the equals sign
/// True if the parsed document used spaces around the equals sign
public: bool UsesSpacesAroundAssignment() const { return (this->paddedAssignments > 0); }
/// Parses a comment, must be called on the comment start character
private: void parseComment();
/// Parses a property or section name, must be called on first character
private: void parseName();
/// Parses a property value, must be called on first character
private: void parseValue();
/// Parses an invalid line until the next line break
private: void parseMalformedLine();
/// Submits was has been parsed so far as a line
private: void submitLine();
/// Generates a line in which a property is declared
/// A property-declaring line filled from the current parser state
private: PropertyLine *generatePropertyLine();
/// Generates a line in which a section is declared
/// A section-declaring line filled from the current parser state
private: SectionLine *generateSectionLine();
/// Retrieves the default section or create a new one if none exists
private: IndexedSection *getOrCreateDefaultSection();
/// Resets the parser state
private: void resetState();
/// Allocates memory for the specified line and fills its content buffer
///
/// Type of line that will be allocated. Must inherit from the type
///
/// Line contents that will be stored
/// Number of bytes the line long
/// The newly allocated line with its content buffer filled
private: template TLine *allocateLineChunked(
const std::uint8_t *contents, std::size_t byteCount
);
/// Allocates memory for the specified type with extra bytes
/// Type for which memory will be allocated
///
/// Number of extra bytes to make available behind the space used by the type
///
///
/// An *uninitialized* pointer to the requested type which is followed by
/// the desired amount of extra bytes, aligned to the requirements of the type
///
private: template T *allocateChunked(std::size_t extraByteCount = 0);
/// The document model into this parser will fill
private: IniDocumentModel *target;
/// Remaining space in the current allocation chunk
private: std::size_t remainingChunkByteCount;
/// Section into which parsed elements go currently
private: IndexedSection *currentSection;
/// Pointer to the beginning of the .ini file in memory
private: const std::uint8_t *fileBegin;
/// Pointer one past the end of the .ini file in memory
private: const std::uint8_t *fileEnd;
/// Pointer to the current parsing location
private: const std::uint8_t *parsePosition;
/// Position at which the current line in the .ini file begins
private: const std::uint8_t *lineStart;
/// Position at which the current section or property's name starts
private: const std::uint8_t *nameStart;
/// Position one after the end of the current section or property name
private: const std::uint8_t *nameEnd;
/// Position at which the current property's value starts, if any
private: const std::uint8_t *valueStart;
/// Position one after the end of the current property's value, if any
private: const std::uint8_t *valueEnd;
/// Whether a section was found in the current line
private: bool sectionFound;
/// Whether an equals sign was found in the current line
private: bool equalsSignFound;
/// Whether we encountered something that breaks the current line
private: bool lineIsMalformed;
/// Heuristic - if positive, document uses Windows line breaks
private: int windowsLineBreaks;
/// Heuristic - if positive, document has blank lines between properties
private: int blankLines;
/// Heuristic - if positive, document puts spaces around assignment
private: int paddedAssignments;
};
// ------------------------------------------------------------------------------------------- //
}}} // namespace Nuclex::Support::Settings
#endif // NUCLEX_SUPPORT_SETTINGS_INIDOCUMENTMODEL_FILEPARSER_H