#region CPL License
/*
Nuclex Framework
Copyright (C) 2002-2009 Nuclex Development Labs
This library is free software; you can redistribute it and/or
modify it under the terms of the IBM Common Public License as
published by the IBM Corporation; either version 1.0 of the
License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
IBM Common Public License for more details.
You should have received a copy of the IBM Common Public
License along with this library
*/
#endregion
using System;
using System.Collections.Generic;
using System.Text;
using Nuclex.Support;
namespace Nuclex.Networking.Http {
/// Parses lines in HTTP/1.1 request headers
///
///
/// In order to keep the parsing process clean and readable, the HTTP request parsing
/// has been split into two parts. This is the low level part, a simple line parser
/// that efficiently extracts individual lines from an HTTP request while honoring
/// the guidelines from the RFC describing the HTTP protocol. The parser is designed
/// to be used when implementing more complex parsers and has a specialized interface
/// intended for the parser writers.
///
///
/// The correct usage is to feed it a chunk of data (with arbitrary length) using the
/// method and then let it chop the chunk
/// down into lines by calling the method
/// repeatedly until it returns null (meaning it requires more data to continue) or
/// throws an exception.
///
///
/// You should not try to continue parsing once the
/// method has thrown an exception. Providing the parser with new data is also only
/// allowed when it has explicitely asked for more data by returning null from the
/// method. Calling
/// any earlier will result in
/// the unprocessed data in the receive buffer to not be seen by the parser.
///
///
/// This is so because the parser works directly on your buffer. In order to achieve
/// maximum efficiency, it will not copy the into a local buffer unless required to
/// when it has to bridge lines that are split between two chunks provided by the
/// method.
///
///
public class HttpLineParser : LineParser {
// These constants use abbreviations to match the RFC descriptions.
/// ASCII code for the space character
private const byte SP = 32;
/// ASCII code for the horizontal tab character
private const byte HT = 9;
/// ASCII code for the delete character
private const byte DEL = 127;
/// ID of the ISO-8859-1 code page
private const int ISO_8859_1 = 28591;
/// Initializes a new HTTP/1.1 request parser
public HttpLineParser() : base() { }
/// Initializes a new HTTP/1.1 request parser
///
/// Maximum size the request header is allowed to have
///
public HttpLineParser(int maximumRequestHeaderSize) :
base(maximumRequestHeaderSize) {
this.maximumRequestHeaderSize = maximumRequestHeaderSize;
}
///
/// Called when the message is growing beyond the maximum message size
///
///
/// An exception that will be thrown to indicate the too large message
///
protected override Exception HandleMessageTooLarge() {
return Errors.RequestEntityTooLarge(this.maximumRequestHeaderSize);
}
///
/// Called when the message contains a carriage return without a line feed
///
protected override void HandleLoneCarriageReturn() {
throw Errors.BadRequest("Invalid character in request header");
}
///
/// Called to scan the bytes of a potential line for invalid characters
///
///
/// Array containing the bytes that to can for invalid characters
///
/// Index in the array at which to begin reading
/// Number of bytes from the array to scan
protected override void VerifyPotentialLine(byte[] buffer, int start, int count) {
// Make sure the line does not contain any characters which are considered
// invalid by the RFC
for(int index = start; index < count; ++index) {
// First, find out whether this is a control character. All but 2 control
// characters are disallowed by the RFC
bool isControlCharacter =
(buffer[index] < 32) ||
(buffer[index] == DEL);
// If it Is a control character, we need to do another check to see whether
// the characters is one of the two allowed control characters
if(isControlCharacter) {
bool isValidControlCharacter =
(buffer[index] == SP) ||
(buffer[index] == HT);
// It's not one of the two allowed control characters, let's complain
if(!isValidControlCharacter) {
throw Errors.BadRequest("Invalid character in request header");
}
}
}
}
///
/// Called to transform a received series of bytes into a string
///
/// Buffer containing the bytes to be transformed
/// Index of the first byte to transform
/// Number of bytes to transform into a string
/// The string produced from the bytes in the specified buffer
///
/// This method allows you to use your own encoding for transforming the bytes
/// in a line into a string. Always called to transform an entire line in one
/// piece, excluding the CR LF characters at the line's end.
///
protected override string TransformToString(byte[] buffer, int start, int count) {
return Encoding.GetEncoding(ISO_8859_1).GetString(buffer, start, count);
}
/// Maximum size the request header is allowed to have
private int maximumRequestHeaderSize;
}
} // namespace Nuclex.Networking.Http