#region CPL License
/*
Nuclex Framework
Copyright (C) 2002-2009 Nuclex Development Labs
This library is free software; you can redistribute it and/or
modify it under the terms of the IBM Common Public License as
published by the IBM Corporation; either version 1.0 of the
License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
IBM Common Public License for more details.
You should have received a copy of the IBM Common Public
License along with this library
*/
#endregion
using System;
using System.Collections.Generic;
using System.Text;
using Nuclex.Support;
namespace Nuclex.Networking {
/// Parses lines from binary data
///
///
/// To use this parser, derive a class from it to set up your own filtering rules
/// for invalid characters and text encoding format.
///
///
/// Next, create an instance of your new parser and feed it a chunk of data (with
/// arbitrary length, either containing the complete message or only a fraction of
/// it) using the method. Then let it chop the chunk
/// down into lines by calling the method repeatedly until
/// it returns null (meaning it requires more data to continue) or throws
/// an exception.
///
///
/// If you're implementing a combined text/binary protocol like HTTP you can also,
/// at any time, call to obtain any data that has
/// not been parsed yet to obtain the beginning of binary data the followed the
/// textual header of a request.
///
///
/// You can reuse the same parser for multiple requests by calling its
/// method, which will restore it to the state it was in when
/// it had just been created.
///
///
/// Your will find several references to "the RFC" in the comments within the code.
/// This is because this parser, while generic in purpose, relies on the RFC for
/// the HTTP protocol (RFC-2616) for any decisions on how to proceed. One this class'
/// design goals is to be usable as the fundament for a HTTP protocol parser.
///
///
public abstract class LineParser {
/// ASCII code for the carriage return character
private const byte CR = 13;
/// ASCII code for the line feed character
private const byte LF = 10;
/// Initializes a new line parser
public LineParser() : this(1024) { }
/// Initializes a new line parser
///
/// Maximum size the entire message is allowed to have in bytes
///
public LineParser(int maximumMessageSize) {
this.maximumMessageSize = maximumMessageSize;
this.storedBytes = new byte[64];
}
/// Assigns a new chunk of received data for parsing
/// Array containing the bytes that will be parsed
/// Index in the array at which to begin parsing
/// Number of bytes to parse
///
/// This method has to be called before the ParseHeaderLine() method can be used.
///
public void SetReceivedData(byte[] bytes, int start, int count) {
this.receivedBytes = bytes;
this.receivedByteIndex = start;
this.receivedByteCount = count;
}
///
/// Returns the remaining (still unparsed) data in the received buffer
///
/// The remaining data from the receive buffer
public ArraySegment GetRemainingData() {
try {
return new ArraySegment(
this.receivedBytes, this.receivedByteIndex, this.receivedByteCount
);
}
finally {
this.receivedByteCount = 0;
}
}
/// Resets the parser to its initial state
public void Reset() {
this.accumulatedRequestSize = 0;
this.storedByteCount = 0;
this.receivedByteCount = 0;
this.storedBytesEndWithCR = false;
}
/// Attempts to parse a complete line from the received data
/// The complete line or null if more data is required
///
/// Before calling this method, you have to assign the data to be parsed using
/// the method. The idea is to call
/// once and then keep calling this method until
/// it returns null (meaning it ran out of data), at which point you can call
/// and continue parsing lines or call
/// instead to retrieve the still unparsed
/// bytes following the most recently parsed line.
///
public string ParseLine() {
// Find out how many bytes remain to be parsed. If we run out of bytes,
// tell this to the caller by returning null
if(this.receivedByteCount == 0) {
return null;
}
// If the data we parsed in the last run ended with a CR, it might be a line
// break that got split in two packets.
if(this.storedBytesEndWithCR) {
// Find out whether a line feed follows
bool isLineFeed = (this.receivedBytes[this.receivedByteIndex] == LF);
// Advance to the next character. We don't have to care about request length
// here because the request will be rejected when it's clear that it cannot
// complete before exceeding the maximum length.
++this.accumulatedRequestSize;
++this.receivedByteIndex;
--this.receivedByteCount;
// Ensure the store buffer is cleared, whatever happens
try {
// If the next received byte is not an LF, that means there's a lone CR in
// the stream, which the RFC says isn't allowed
if(!isLineFeed) {
HandleLoneCarriageReturn();
}
// Transform the received bytes into a string. No verification needed at
// this point since all characters in the buffer are verified already
return TransformToString(this.storedBytes, 0, this.storedByteCount - 1);
}
finally {
this.storedBytesEndWithCR = false;
this.storedByteCount = 0;
}
} // if storedBytesEndWithCR
return internalScanForLineEnding();
}
///
/// Called when the message is growing beyond the maximum message size
///
///
/// An exception that will be thrown to indicate the too large message
///
protected abstract Exception HandleMessageTooLarge();
///
/// Called when the message contains a carriage return without a line feed
///
///
/// It is safe to throw an exception here. The exception will travel up in
/// the call stack to the caller of the method.
///
protected abstract void HandleLoneCarriageReturn();
///
/// Called to scan the bytes of a potential line for invalid characters
///
///
/// Array containing the bytes that to can for invalid characters
///
/// Index in the array at which to begin reading
/// Number of bytes from the array to scan
///
///
/// This method is used to check for invalid characters even before a complete
/// line has been received. It will be called with incomplete lines (for example,
/// when the received data ends before a CR LF is encountered) to allow for early
/// rejection of data containing characters not allowed by a protocol.
///
///
/// It is safe to throw an exception here. The exception will travel up in
/// the call stack to the caller of the method.
///
///
protected abstract void VerifyPotentialLine(byte[] buffer, int start, int count);
///
/// Called to transform a received series of bytes into a string
///
/// Buffer containing the bytes to be transformed
/// Index of the first byte to transform
/// Number of bytes to transform into a string
/// The string produced from the bytes in the specified buffer
///
/// This method allows you to use your own encoding for transforming the bytes
/// in a line into a string. Always called to transform an entire line in one
/// piece, excluding the CR LF characters at the line's end.
///
protected abstract string TransformToString(byte[] buffer, int start, int count);
/// Internal method that scans the received data for a header line
///
/// The header line if enough data for at least one complete line was available,
/// null if more data is required
///
private string internalScanForLineEnding() {
// Find out how many bytes we can access before exceeding the maximum
// request size defined by the user
int safeByteCount = Math.Min(
this.receivedByteCount, this.maximumMessageSize - this.accumulatedRequestSize
);
// Look for the next carriage return in the stream
int crIndex = Array.IndexOf(
this.receivedBytes,
CR,
this.receivedByteIndex,
safeByteCount
);
// No line terminator found? Assume all received data belongs to a single line.
if(crIndex == -1) {
// First scan the received data for invalid characters. If the request is too
// large, we still do this because we want to mime a byte-by-byte parser that
// would encounter the invalid character before noticing the request is too large.
VerifyPotentialLine(this.receivedBytes, this.receivedByteIndex, safeByteCount);
// We know there's no CR in the scanned data. If the data comes to within 1
// byte of the maximum header size, a valid request line is not possible
// anymore. This also catches the case when safeByteCount was capped.
int totalSize = this.accumulatedRequestSize + safeByteCount;
if(totalSize >= (this.maximumMessageSize - 1)) {
throw HandleMessageTooLarge();
}
// Copy all remaining characters into our temporary line buffer so we can use them
// later when the request line will (hopefully) be terminated. We do not have to
// care that this takes over receive buffer because a too large message header
// would have been caught in the previous if already.
internalTakeOverReceiveBuffer();
return null;
} else { // Line terminator found
// At this point, only three outcomes are possible: We either run out of data,
// obtain a complete line or discover that the CR is not followed by an LF,
// meaning the request line is invalid.
return internalParsePotentialLine(crIndex);
}
}
/// Parses a potential request line for final LF character
/// Index of the CR character in the received data
///
/// A string containing the parsed line or null if more data is required
///
private string internalParsePotentialLine(int crIndex) {
// Find out how many bytes in the receive buffer we skipped from the start
// of the current line to the index of the CR we discovered
int skippedBytes = crIndex - this.receivedByteIndex;
// Make sure the received bytes are valid characters. As before, we try to mime
// a byte-by-byte parser, so we will check for this first before handling
// potential errors at the CR we just discovered.
VerifyPotentialLine(this.receivedBytes, this.receivedByteIndex, skippedBytes);
// If the CR is at the end of the receive buffer, the request might be invalid
// in case the CR is exactly at the edge of the allowed header length. Just
// one byte too large, but we care about precision here!
int totalSize = this.accumulatedRequestSize + skippedBytes;
if(totalSize >= (this.maximumMessageSize - 1)) {
throw HandleMessageTooLarge();
}
// Find out whether we can safely take at least one more byte from the receive
// buffer. If that's the case, we might be able to avoid a buffer copy. If,
// on the other hand, the CR was the final byte we were provided with, we have
// to stop here and tell the caller to give us more data.
bool oneMoreByteAvailable = ((skippedBytes + 1) < this.receivedByteCount);
if(!oneMoreByteAvailable) {
internalTakeOverReceiveBuffer();
this.storedBytesEndWithCR = true;
return null;
}
// Find out whether the character that follows is a line feed.
bool isLineFeed = (this.receivedBytes[crIndex + 1] == LF);
// Make sure the buffer pointers are updated whatever happens next.
try {
// If this is not a line feed character, we have found a lone CR character
// and thus, the request is invalid.
if(!isLineFeed) {
HandleLoneCarriageReturn();
}
// Optimization: If the complete line is in the receive buffer, we do not need
// to waste time copying data to the store buffer
if(this.storedByteCount == 0) {
return TransformToString(
this.receivedBytes, this.receivedByteIndex, skippedBytes
);
} else { // Line is split between store buffer and receive buffer
ensureAdditionalStoreCapacity(skippedBytes);
Array.Copy(
this.receivedBytes,
this.receivedByteIndex,
this.storedBytes,
this.storedByteCount,
skippedBytes
);
return TransformToString(
this.storedBytes, 0, this.storedByteCount + skippedBytes
);
}
}
finally {
skippedBytes += 2;
this.storedByteCount = 0;
this.receivedByteIndex += skippedBytes;
this.receivedByteCount -= skippedBytes;
this.accumulatedRequestSize += skippedBytes;
}
}
/// Takes over the current receive buffer into the store buffer
private void internalTakeOverReceiveBuffer() {
// Make sure the store buffer is large enough to take the received data
ensureAdditionalStoreCapacity(this.receivedByteCount);
// Done, now append all received data to the store buffer
try {
this.accumulatedRequestSize += this.receivedByteCount;
Array.Copy(
this.receivedBytes,
this.receivedByteIndex,
this.storedBytes,
this.storedByteCount,
this.receivedByteCount
);
}
finally {
this.storedByteCount += this.receivedByteCount;
this.receivedByteCount = 0;
}
}
///
/// Makes sure that the line buffer has enough capacity to fit the specified
/// amount of additional characters in it
///
/// Number of required additional characters
private void ensureAdditionalStoreCapacity(int additionalSize) {
// See whether we need to do anything at all
bool needsExpansion =
((this.storedByteCount + additionalSize) > this.storedBytes.Length);
// If we need to expand, resize the store buffer to the next highest power of 2
if(needsExpansion) {
int newSize = IntegerHelper.NextPowerOf2(
this.storedByteCount + additionalSize
);
byte[] newStoredBytes = new byte[newSize];
Array.Copy(this.storedBytes, newStoredBytes, this.storedByteCount);
this.storedBytes = newStoredBytes;
}
}
/// Maximum size the request header is allowed to reach
private int maximumMessageSize;
/// Buffer containing the received bytes while they're processed
private byte[] receivedBytes;
/// Current index in the received bytes the parser is working at
private int receivedByteIndex;
/// Number of received bytes left to process
private int receivedByteCount;
///
/// Stores received data if it needs to be remembered between two parse runs
///
private byte[] storedBytes;
/// Number of bytes in the temporary store buffer
private int storedByteCount;
/// Whether the final byte in the temporary store buffer is a CR
private bool storedBytesEndWithCR;
/// Total size of the request
private int accumulatedRequestSize;
}
} // namespace Nuclex.Networking