#region CPL License /* Nuclex Framework Copyright (C) 2002-2009 Nuclex Development Labs This library is free software; you can redistribute it and/or modify it under the terms of the IBM Common Public License as published by the IBM Corporation; either version 1.0 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the IBM Common Public License for more details. You should have received a copy of the IBM Common Public License along with this library */ #endregion using System; using System.Collections.Generic; using System.Text; using Nuclex.Support; namespace Nuclex.Networking.Http { /// Parses lines in HTTP/1.1 request headers /// /// /// In order to keep the parsing process clean and readable, the HTTP request parsing /// has been split into two parts. This is the low level part, a simple line parser /// that efficiently extracts individual lines from an HTTP request while honoring /// the guidelines from the RFC describing the HTTP protocol. The parser is designed /// to be used when implementing more complex parsers and has a specialized interface /// intended for the parser writers. /// /// /// The correct usage is to feed it a chunk of data (with arbitrary length) using the /// method and then let it chop the chunk /// down into lines by calling the method /// repeatedly until it returns null (meaning it requires more data to continue) or /// throws an exception. /// /// /// You should not try to continue parsing once the /// method has thrown an exception. Providing the parser with new data is also only /// allowed when it has explicitely asked for more data by returning null from the /// method. Calling /// any earlier will result in /// the unprocessed data in the receive buffer to not be seen by the parser. /// /// /// This is so because the parser works directly on your buffer. In order to achieve /// maximum efficiency, it will not copy the into a local buffer unless required to /// when it has to bridge lines that are split between two chunks provided by the /// method. /// /// public class HttpLineParser : LineParser { // These constants use abbreviations to match the RFC descriptions. /// ASCII code for the space character private const byte SP = 32; /// ASCII code for the horizontal tab character private const byte HT = 9; /// ASCII code for the delete character private const byte DEL = 127; /// ID of the ISO-8859-1 code page private const int ISO_8859_1 = 28591; /// Initializes a new HTTP/1.1 request parser public HttpLineParser() : base() { } /// Initializes a new HTTP/1.1 request parser /// /// Maximum size the request header is allowed to have /// public HttpLineParser(int maximumRequestHeaderSize) : base(maximumRequestHeaderSize) { this.maximumRequestHeaderSize = maximumRequestHeaderSize; } /// /// Called when the message is growing beyond the maximum message size /// /// /// An exception that will be thrown to indicate the too large message /// protected override Exception HandleMessageTooLarge() { return Errors.RequestEntityTooLarge(this.maximumRequestHeaderSize); } /// /// Called when the message contains a carriage return without a line feed /// protected override void HandleLoneCarriageReturn() { throw Errors.BadRequest("Invalid character in request header"); } /// /// Called to scan the bytes of a potential line for invalid characters /// /// /// Array containing the bytes that to can for invalid characters /// /// Index in the array at which to begin reading /// Number of bytes from the array to scan protected override void VerifyPotentialLine(byte[] buffer, int start, int count) { // Make sure the line does not contain any characters which are considered // invalid by the RFC for(int index = start; index < count; ++index) { // First, find out whether this is a control character. All but 2 control // characters are disallowed by the RFC bool isControlCharacter = (buffer[index] < 32) || (buffer[index] == DEL); // If it Is a control character, we need to do another check to see whether // the characters is one of the two allowed control characters if(isControlCharacter) { bool isValidControlCharacter = (buffer[index] == SP) || (buffer[index] == HT); // It's not one of the two allowed control characters, let's complain if(!isValidControlCharacter) { throw Errors.BadRequest("Invalid character in request header"); } } } } /// /// Called to transform a received series of bytes into a string /// /// Buffer containing the bytes to be transformed /// Index of the first byte to transform /// Number of bytes to transform into a string /// The string produced from the bytes in the specified buffer /// /// This method allows you to use your own encoding for transforming the bytes /// in a line into a string. Always called to transform an entire line in one /// piece, excluding the CR LF characters at the line's end. /// protected override string TransformToString(byte[] buffer, int start, int count) { return Encoding.GetEncoding(ISO_8859_1).GetString(buffer, start, count); } /// Maximum size the request header is allowed to have private int maximumRequestHeaderSize; } } // namespace Nuclex.Networking.Http