#region CPL License
/*
Nuclex Framework
Copyright (C) 2002-2009 Nuclex Development Labs
This library is free software; you can redistribute it and/or
modify it under the terms of the IBM Common Public License as
published by the IBM Corporation; either version 1.0 of the
License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
IBM Common Public License for more details.
You should have received a copy of the IBM Common Public
License along with this library
*/
#endregion
using System;
using System.Collections.Generic;
using Nuclex.Support;
namespace Nuclex.Networking.Http {
/// Parses HTTP/1.1 requests
///
///
/// This is a high-performance and low-garbage HTTP request parser that can be
/// fed with incoming data incrementally. The parser is designed to be bullet-proof
/// and will not run into an undefined state no matter what data it is given. It
/// will not look at a single byte more than the maximum allowed request header
/// size you specify, thereby also making it resilient to memory load attacks.
///
///
/// Parsing takes place directly on the data you provide the parser with, avoiding
/// expensive memory copies and string conversion/splitting operations. This makes
/// it ideal for usage in systems with limited memory or a cheaper implementation
/// of the .NET garbage collection and ensures good scalability on other systems.
///
///
internal class RequestParser : HttpLineParser {
#region enum ParserState
/// States the request parser can be in
private enum ParserState {
/// Waiting for the request line or a CR-LF to be sent
AwaitingRequestLineOrCrLf,
/// Waiting for the request line to be sent
AwaitingRequestLine,
/// Waiting for additional headers to be sent
AwaitingRequestHeaderOrEnd,
/// Waiting for the request's data to be transmitted
AwaitingRequestData
}
#endregion // enum ParserState
/// Initializes a new HTTP request parser
///
/// Maximum length the HTTP request header is allowed to have
///
public RequestParser(int maximumRequestHeaderSize) :
base(maximumRequestHeaderSize) {
this.requestBuilder = new RequestBuilder();
Reset();
}
/// Resets the parser to the initial state for a new request
public new void Reset() {
this.state = ParserState.AwaitingRequestLineOrCrLf;
this.requestBuilder.Reset();
base.Reset();
}
/// Instructs the parser to process the provided bytes
/// Buffer containing the bytes that will be parsed
/// Index in the buffer at which to start reading
/// Number of bytes that will be parsed
///
/// The parsed HTTP request if a complete request could be constructed from the
/// provided bytes, null if more data is required
///
///
///
/// When a request is returned, there may be leftover data in the buffer that was
/// following the request. This data can either be another request sent by the
/// client or uploaded data for an HTTP POST request.
///
///
/// You should always extract the leftover data as soon as a request is returned
/// using the method and buffer it until you know
/// (from the request header) what is to be done with it.If the request doesn't
/// involve any uploaded data, you can feed it back to the request parser after
/// calling the method once (or creating a new parser).
///
///
public Request ProcessBytes(byte[] buffer, int start, int count) {
// If the request is complete, assume all incoming data is additional data
// appended to the request (eg. HTTP POST)
if(this.state == ParserState.AwaitingRequestData) {
return null;
}
// Hand the received data over to the underlying line parser
SetReceivedData(buffer, start, count);
for(; ; ) {
// Let the line parser extract the next line from the data
string line = base.ParseLine();
// If we need more data to complete the current line, exit the loop and
// wait for ProcessBytes() be called again
if(line == null) {
return null;
}
switch(this.state) {
// We're awaiting either the request line of an empty line. The RFC says that
// server should handle a single CR-LF being sent before the request line.
case ParserState.AwaitingRequestLineOrCrLf: {
if(line == string.Empty) {
this.state = ParserState.AwaitingRequestLine;
break;
} else {
goto case ParserState.AwaitingRequestLine;
}
}
// we either received an empty line already or the first line sent to us was
// non-empty, so now expect the client to follow with the request line.
case ParserState.AwaitingRequestLine: {
parseRequestLine(line);
this.currentFieldName = null;
this.state = ParserState.AwaitingRequestHeaderOrEnd;
break;
}
// Request line has been received and we're now waiting for additional request
// header lines or the end of the request header
case ParserState.AwaitingRequestHeaderOrEnd: {
if(line == string.Empty) {
this.state = ParserState.AwaitingRequestData;
return this.requestBuilder.BuildRequest();
}
parseHeaderLine(line);
break;
}
// Honestly, this should not occur :)
default: {
throw new Exception("Internal error: invalid parser state");
}
}
} // for(;;)
}
/// The remaining bytes the parser has not yet processed
///
/// After a request is complete, normally, all bytes in the receive buffer should
/// have been processed. If the request was a HTTP POST request, however, the client
/// might begin sending the data immediately after the request header. In this case,
/// you will have to take back the remaining, unparsed bytes from the parser
/// after the complete request has been parsed.
///
public new ArraySegment GetRemainingData() {
return base.GetRemainingData();
}
/// Parses the request line sent from the client
/// String containing the received request line
private void parseRequestLine(string requestLine) {
// The RFC doesn't say that the request line must not contain any additional
// spaces, so in the we will assume the first space terminates the method and the
// last space terminates the URI.
int uriDelimiterIndex = requestLine.IndexOf(' ');
if(uriDelimiterIndex == -1) {
throw Errors.BadRequest("Request-line is missing an URI");
}
// If there's only one space character, then the request is missing the version
// of the HTTP protocol used.
int versionDelimiterIndex = requestLine.LastIndexOf(' ');
if(versionDelimiterIndex == uriDelimiterIndex) {
throw Errors.BadRequest("Request-line does not specify HTTP version");
}
// Request seems to be at least be in the right layout. Extract the individual
// components and pass them to the request container builder (validation of
// the actual settings takes place once we have a complete request).
requestBuilder.Method = requestLine.Substring(0, uriDelimiterIndex);
requestBuilder.Uri = requestLine.Substring(
uriDelimiterIndex + 1, versionDelimiterIndex - uriDelimiterIndex - 1
);
requestBuilder.Version = requestLine.Substring(versionDelimiterIndex + 1);
// We expect HTTP/1.* to stay compatible with the general format of the request.
// Any other version of the protocol may include major changes to the request
// format, thus we only accept HTTP/1.*.
if(!requestBuilder.Version.StartsWith("HTTP/1.")) {
throw Errors.UnsupportedProtocolVersion();
}
}
/// Parses a request header line sent from the client
/// String containing the received header line
private void parseHeaderLine(string headerLine) {
// Find out whether this header line begins with whitespace. According to the
// RFC, a message header can be broken into multiple lines by beginning the
// next line with one or more whitespace characters (SP and HT)
char firstCharacter = headerLine[0];
bool startsWithWhitespace =
(firstCharacter == ' ') ||
(firstCharacter == '\t');
// If the line starts with a whitespace, it is either a continuation of the
// previous line or simply a broken request (or there is no previous line)
if(startsWithWhitespace) {
// If this is the first header field, the request is broken
if(this.currentFieldName == null) {
throw Errors.BadRequest("First message header is preceded by whitespace");
}
// Alright, this actually seems to be a valid field continuation
parseHeaderFieldValue(headerLine, 1);
} else { // Line doesn't begin with a whitespace
// Look for the delimiter character that ends the field name
int valueDelimiterIndex = headerLine.IndexOf(':');
if(valueDelimiterIndex == -1) { // No delimiter? Invalid request!
throw Errors.BadRequest("Message header field omits value");
}
// Extract the field name from the line
string fieldName = headerLine.Substring(0, valueDelimiterIndex);
if(fieldName == string.Empty) { // Empty field name? Request broken!
throw Errors.BadRequest("Message header contains unnamed field");
}
// There is no mention in the RFC that whitespace is allowed between the
// header field name and the delimiter character, so we don't allow it.
bool fieldNameEndsInWhitespace =
(fieldName[fieldName.Length - 1] == ' ') ||
(fieldName[fieldName.Length - 1] == '\t');
if(fieldNameEndsInWhitespace) {
throw Errors.BadRequest(
"Message header field name is followed by whitespace"
);
}
// Now that we know where the value begins, parse it!
this.currentFieldName = fieldName;
parseHeaderFieldValue(headerLine, valueDelimiterIndex + 1);
}
}
/// Parses the field value of an HTTP header field
/// Line containing the field value
/// Index at which the field value begins
private void parseHeaderFieldValue(string headerLine, int valueIndex) {
// Look for where the value starts (skip any whitespace)
int firstNonWhitespaceIndex = StringHelper.IndexNotOfAny(
headerLine, httpWhitespaces, valueIndex
);
// If there was no value (or the value consisted entirely of whitespace), we
// add the header field as a field without value. A value might still follow
// in the next line, and would be added then, but the RFC allows header fields
// with only a name, so we have to add it now in case it is such a field.
if(firstNonWhitespaceIndex == -1) {
this.requestBuilder.AddHeader(this.currentFieldName);
return;
}
// We scan the source string for the last non-whitespace character instead
// of trimming the string because that would be less efficient for .NET's
// immutable strings. We know that we'll find something since the forward
// scan above has returned a valid index.
int lastNonWhitespaceIndex = StringHelper.LastIndexNotOfAny(
headerLine, httpWhitespaces
);
this.requestBuilder.AddHeader(
this.currentFieldName,
headerLine.Substring(
firstNonWhitespaceIndex,
lastNonWhitespaceIndex - firstNonWhitespaceIndex + 1
)
);
}
/// Characters considered as whitespace in the HTTP protocol
private static readonly char[] httpWhitespaces = new char[] { ' ', '\t' };
/// Collects data and constructs HTTP/1.1 request containers
private RequestBuilder requestBuilder;
/// Current state the parser is in
private ParserState state;
/// Field name of the last request header we parsed
private string currentFieldName;
}
} // namespace Nuclex.Networking.Http