Pods/CocoaHTTPServer/Core/Mime/MultipartFormDataParser.m


#import "MultipartFormDataParser.h"
#import "DDData.h"
#import "HTTPLogging.h"

#pragma mark log level

#ifdef DEBUG
static const int httpLogLevel = HTTP_LOG_LEVEL_WARN;
#else
static const int httpLogLevel = HTTP_LOG_LEVEL_WARN;
#endif

#ifdef __x86_64__
#define FMTNSINT "li"
#else
#define FMTNSINT "i"
#endif


//-----------------------------------------------------------------
// interface MultipartFormDataParser (private)
//-----------------------------------------------------------------


@interface MultipartFormDataParser (private)
+ (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding;

- (int) findHeaderEnd:(NSData*) workingData fromOffset:(int) offset;
- (int) findContentEnd:(NSData*) data fromOffset:(int) offset;

- (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(NSUInteger) length encoding:(int) encoding;
- (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data;

- (int) processPreamble:(NSData*) workingData;

@end


//-----------------------------------------------------------------
// implementation MultipartFormDataParser
//-----------------------------------------------------------------


@implementation MultipartFormDataParser 
@synthesize delegate,formEncoding;

- (id) initWithBoundary:(NSString*) boundary formEncoding:(NSStringEncoding) _formEncoding {
    if( nil == (self = [super init]) ){
        return self;
    }
	if( nil == boundary ) {
		HTTPLogWarn(@"MultipartFormDataParser: init with zero boundary");
		return nil;
	}
    boundaryData = [[@"\r\n--" stringByAppendingString:boundary] dataUsingEncoding:NSASCIIStringEncoding];

    pendingData = [[NSMutableData alloc] init];
    currentEncoding = contentTransferEncoding_binary;
	currentHeader = nil;

	formEncoding = _formEncoding;
	reachedEpilogue = NO;
	processedPreamble = NO;

    return self;
}


- (BOOL) appendData:(NSData *)data { 
    // Can't parse without boundary;
    if( nil == boundaryData ) {
		HTTPLogError(@"MultipartFormDataParser: Trying to parse multipart without specifying a valid boundary");
		assert(false);
        return NO;
    }
    NSData* workingData = data;

    if( pendingData.length ) {
        [pendingData appendData:data];
        workingData = pendingData;
    }

	// the parser saves parse stat in the offset variable, which indicates offset of unhandled part in 
	// currently received chunk. Before returning, we always drop all data up to offset, leaving 
	// only unhandled for the next call

    int offset = 0;

	// don't parse data unless its size is greater then boundary length, so we couldn't
	// misfind the boundary, if it got split into different data chunks
	NSUInteger sizeToLeavePending = boundaryData.length;

	if( !reachedEpilogue && workingData.length <= sizeToLeavePending )  {
		// not enough data even to start parsing.
		// save to pending data.
		if( !pendingData.length ) {
			[pendingData appendData:data];
		}
		if( checkForContentEnd ) {
			if(	pendingData.length >= 2 ) {
				if( *(uint16_t*)(pendingData.bytes + offset) == 0x2D2D ) {
					// we found the multipart end. all coming next is an epilogue.
					HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message");
					waitingForCRLF = YES;
					reachedEpilogue = YES;
					offset+= 2;
				}
				else {
					checkForContentEnd = NO;
					waitingForCRLF = YES;
					return YES;
				}
			} else {
				return YES;
			}
			
		}
		else {
			return YES;
		}
	}
	while( true ) {
		if( checkForContentEnd ) {
			// the flag will be raised to check if the last part was the last one.
			if( offset < workingData.length -1 ) {
				char* bytes = (char*) workingData.bytes;
				if( *(uint16_t*)(bytes + offset) == 0x2D2D ) {
					// we found the multipart end. all coming next is an epilogue.
					HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message");
					checkForContentEnd = NO;
					reachedEpilogue = YES;
					// still wait for CRLF, that comes after boundary, but before epilogue.
					waitingForCRLF = YES;
					offset += 2;
				}
				else {
					// it's not content end, we have to wait till separator line end before next part comes
					waitingForCRLF = YES;
					checkForContentEnd = NO;
				}
			}
			else {
				// we haven't got enough data to check for content end.
				// save current unhandled data (it may be 1 byte) to pending and recheck on next chunk received
				if( offset < workingData.length ) {
					[pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]];
				}
				else {
					// there is no unhandled data now, wait for more chunks
					[pendingData setData:[NSData data]];
				}
				return YES;
			}
		}
		if( waitingForCRLF ) {

			// the flag will be raised in the code below, meaning, we've read the boundary, but
			// didnt find the end of boundary line yet.

			offset = [self offsetTillNewlineSinceOffset:offset inData:workingData];
			if( -1 == offset ) {
				// didnt find the endl again.
				if( offset ) {
					// we still have to save the unhandled data (maybe it's 1 byte CR)
					if( *((char*)workingData.bytes + workingData.length -1) == '\r' ) {
						[pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]];
					}
					else {
						// or save nothing if it wasnt 
						[pendingData setData:[NSData data]];
					}
				}
				return YES;
			}
			waitingForCRLF = NO;
		}
		if( !processedPreamble ) {
			// got to find the first boundary before the actual content begins.
			offset = [self processPreamble:workingData];
			// wait for more data for preamble
			if( -1 == offset ) 
				return YES;
			// invoke continue to skip newline after boundary.
			continue;
		}

		if( reachedEpilogue ) {
			// parse all epilogue data to delegate.
			if( [delegate respondsToSelector:@selector(processEpilogueData:)] ) {
				NSData* epilogueData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length: workingData.length - offset freeWhenDone:NO];
				[delegate processEpilogueData: epilogueData];
			}
			return YES;
		}

		if( nil == currentHeader ) {
			// nil == currentHeader is a state flag, indicating we are waiting for header now.
			// whenever part is over, currentHeader is set to nil.

			// try to find CRLFCRLF bytes in the data, which indicates header end.
			// we won't parse header parts, as they won't be too large.
			int headerEnd = [self findHeaderEnd:workingData fromOffset:offset];
			if( -1 == headerEnd ) {
				// didn't recieve the full header yet.
				if( !pendingData.length) {
					// store the unprocessed data till next chunks come
					[pendingData appendBytes:data.bytes + offset length:data.length - offset];
				}
				else {
					if( offset ) {
						// save the current parse state; drop all handled data and save unhandled only.
						pendingData = [[NSMutableData alloc] initWithBytes: (char*) workingData.bytes + offset length:workingData.length - offset];
					}
				}
				return  YES;
			}
			else {

				// let the header parser do it's job from now on.
				NSData * headerData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length:headerEnd + 2 - offset freeWhenDone:NO];
				currentHeader = [[MultipartMessageHeader alloc] initWithData:headerData formEncoding:formEncoding];

				if( nil == currentHeader ) {
					// we've found the data is in wrong format.
					HTTPLogError(@"MultipartFormDataParser: MultipartFormDataParser: wrong input format, coulnd't get a valid header");
					return NO;
				}
                if( [delegate respondsToSelector:@selector(processStartOfPartWithHeader:)] ) {
                    [delegate processStartOfPartWithHeader:currentHeader];
                }

				HTTPLogVerbose(@"MultipartFormDataParser: MultipartFormDataParser: Retrieved part header.");
			}
			// skip the two trailing \r\n, in addition to the whole header.
			offset = headerEnd + 4;	
		}
		// after we've got the header, we try to
		// find the boundary in the data.
		int contentEnd = [self findContentEnd:workingData fromOffset:offset];
		
		if( contentEnd == -1 ) {

			// this case, we didn't find the boundary, so the data is related to the current part.
			// we leave the sizeToLeavePending amount of bytes to make sure we don't include 
			// boundary part in processed data.
			NSUInteger sizeToPass = workingData.length - offset - sizeToLeavePending;

			// if we parse BASE64 encoded data, or Quoted-Printable data, we will make sure we don't break the format
			int leaveTrailing = [self numberOfBytesToLeavePendingWithData:data length:sizeToPass encoding:currentEncoding];
			sizeToPass -= leaveTrailing;
			
			if( sizeToPass <= 0 ) {
				// wait for more data!
				if( offset ) {
					[pendingData setData:[NSData dataWithBytes:(char*) workingData.bytes + offset length:workingData.length - offset]];
				}
				return YES;
			}
			// decode the chunk and let the delegate use it (store in a file, for example)
			NSData* decodedData = [MultipartFormDataParser decodedDataFromData:[NSData dataWithBytesNoCopy:(char*)workingData.bytes + offset length:workingData.length - offset - sizeToLeavePending freeWhenDone:NO] encoding:currentEncoding];
			
			if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) {
				HTTPLogVerbose(@"MultipartFormDataParser: Processed %"FMTNSINT" bytes of body",sizeToPass);

				[delegate processContent: decodedData WithHeader:currentHeader];
			}

			// store the unprocessed data till the next chunks come.
			[pendingData setData:[NSData dataWithBytes:(char*)workingData.bytes + workingData.length - sizeToLeavePending length:sizeToLeavePending]];
			return YES;
		}
		else {

			// Here we found the boundary.
			// let the delegate process it, and continue going to the next parts.
			if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) {
				[delegate processContent:[NSData dataWithBytesNoCopy:(char*) workingData.bytes + offset length:contentEnd - offset freeWhenDone:NO] WithHeader:currentHeader];
			}

			if( [delegate respondsToSelector:@selector(processEndOfPartWithHeader:)] ){
				[delegate processEndOfPartWithHeader:currentHeader];
				HTTPLogVerbose(@"MultipartFormDataParser: End of body part");
			}
			currentHeader = nil;

			// set up offset to continue with the remaining data (if any)
            // cast to int because above comment suggests a small number
			offset = contentEnd + (int)boundaryData.length;
			checkForContentEnd = YES;
			// setting the flag tells the parser to skip all the data till CRLF
		}
	}
    return YES;
}


//-----------------------------------------------------------------
#pragma mark private methods

- (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data {
	char* bytes = (char*) data.bytes;
	NSUInteger length = data.length;
	if( offset >= length - 1 ) 
		return -1;

	while ( *(uint16_t*)(bytes + offset) != 0x0A0D ) {
		// find the trailing \r\n after the boundary. The boundary line might have any number of whitespaces before CRLF, according to rfc2046

		// in debug, we might also want to know, if the file is somehow misformatted.
#ifdef DEBUG
		if( !isspace(*(bytes+offset)) ) {
			HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset) );
		}
		if( !isspace(*(bytes+offset+1)) ) {
			HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset+1) );
		}
#endif
		offset++;
		if( offset >= length ) {
			// no endl found within current data
			return -1;
		}
	}

	offset += 2;
	return offset;
}


- (int) processPreamble:(NSData*) data {
	int offset = 0;
	
	char* boundaryBytes = (char*) boundaryData.bytes + 2; // the first boundary won't have CRLF preceding.
    char* dataBytes = (char*) data.bytes;
    NSUInteger boundaryLength = boundaryData.length - 2;
    NSUInteger dataLength = data.length;
    
	// find the boundary without leading CRLF.
    while( offset < dataLength - boundaryLength +1 ) {
        int i;
        for( i = 0;i < boundaryLength; i++ ) {
            if( boundaryBytes[i] != dataBytes[offset + i] )
                break;
        }
        if( i == boundaryLength ) {
            break;
        }
		offset++;
    }
 	
	if( offset == dataLength ) {
		// the end of preamble wasn't found in this chunk
		NSUInteger sizeToProcess = dataLength - boundaryLength;
		if( sizeToProcess > 0) {
			if( [delegate respondsToSelector:@selector(processPreambleData:)] ) {
				NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: data.length - offset - boundaryLength freeWhenDone:NO];
				[delegate processPreambleData:preambleData];
				HTTPLogVerbose(@"MultipartFormDataParser: processed preamble");
			}
			pendingData = [NSMutableData dataWithBytes: data.bytes + data.length - boundaryLength length:boundaryLength];
		}
		return -1;
	}
	else {
		if ( offset && [delegate respondsToSelector:@selector(processPreambleData:)] ) {
			NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: offset freeWhenDone:NO];
			[delegate processPreambleData:preambleData];
		}
		offset +=boundaryLength;
		// tells to skip CRLF after the boundary.
		processedPreamble = YES;
		waitingForCRLF = YES;
	}
	return offset;
}


- (int) findHeaderEnd:(NSData*) workingData fromOffset:(int)offset {
    char* bytes = (char*) workingData.bytes; 
    NSUInteger inputLength = workingData.length;
    uint16_t separatorBytes = 0x0A0D;

	while( true ) {
		if(inputLength < offset + 3 ) {
			// wait for more data
			return -1;
		}
        if( (*((uint16_t*) (bytes+offset)) == separatorBytes) && (*((uint16_t*) (bytes+offset)+1) == separatorBytes) ) {
			return offset;
        }
        offset++;
    }
    return -1;
}


- (int) findContentEnd:(NSData*) data fromOffset:(int) offset {
    char* boundaryBytes = (char*) boundaryData.bytes;
    char* dataBytes = (char*) data.bytes;
    NSUInteger boundaryLength = boundaryData.length;
    NSUInteger dataLength = data.length;
    
    while( offset < dataLength - boundaryLength +1 ) {
        int i;
        for( i = 0;i < boundaryLength; i++ ) {
            if( boundaryBytes[i] != dataBytes[offset + i] )
                break;
        }
        if( i == boundaryLength ) {
            return offset;
        }
		offset++;
    }
    return -1;
}


- (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(int) length encoding:(int) encoding {
	// If we have BASE64 or Quoted-Printable encoded data, we have to be sure
	// we don't break the format.
	int sizeToLeavePending = 0;
	
	if( encoding == contentTransferEncoding_base64 ) {	
		char* bytes = (char*) data.bytes;
		int i;
		for( i = length - 1; i > 0; i++ ) {
			if( * (uint16_t*) (bytes + i) == 0x0A0D ) {
				break;
			}
		}
		// now we've got to be sure that the length of passed data since last line
		// is multiplier of 4.
		sizeToLeavePending = (length - i) & ~0x11; // size to leave pending = length-i - (length-i) %4;
		return sizeToLeavePending;
	}
	
	if( encoding == contentTransferEncoding_quotedPrintable ) {
		// we don't pass more less then 3 bytes anyway.
		if( length <= 2 ) 
			return length;
		// check the last bytes to be start of encoded symbol.
		const char* bytes = data.bytes + length - 2;
		if( bytes[0] == '=' )
			return 2;
		if( bytes[1] == '=' )
			return 1;
		return 0;
	}
	return 0;
}


//-----------------------------------------------------------------
#pragma mark decoding


+ (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding {
	switch (encoding) {
		case contentTransferEncoding_base64: {
			return [data base64Decoded]; 
		} break;

		case contentTransferEncoding_quotedPrintable: {
			return [self decodedDataFromQuotedPrintableData:data];
		} break;

		default: {
			return data;
		} break;
	}
}


+ (NSData*) decodedDataFromQuotedPrintableData:(NSData *)data {
//	http://tools.ietf.org/html/rfc2045#section-6.7

	const char hex []  = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', };

	NSMutableData* result = [[NSMutableData alloc] initWithLength:data.length];
	const char* bytes = (const char*) data.bytes;
	int count = 0;
	NSUInteger length = data.length;
	while( count < length ) {
		if( bytes[count] == '=' ) {
			[result appendBytes:bytes length:count];
			bytes = bytes + count + 1;
			length -= count + 1;
			count = 0;

			if( length < 3 ) {
				HTTPLogWarn(@"MultipartFormDataParser: warning, trailing '=' in quoted printable data");
			}
			// soft newline
			if( bytes[0] == '\r' ) {
				bytes += 1;
				if(bytes[1] == '\n' ) {
					bytes += 2;
				}
				continue;
			}
			char encodedByte = 0;

			for( int i = 0; i < sizeof(hex); i++ ) {
				if( hex[i] == bytes[0] ) {
					encodedByte += i << 4;
				}
				if( hex[i] == bytes[1] ) {
					encodedByte += i;
				}
			}
			[result appendBytes:&encodedByte length:1];
			bytes += 2;
		}

#ifdef DEBUG
		if( (unsigned char) bytes[count] > 126 ) {
			HTTPLogWarn(@"MultipartFormDataParser: Warning, character with code above 126 appears in quoted printable encoded data");
		}
#endif
		
		count++;
	}
	return result;
}


@end