diff --git a/MagicBytesValidator.Tests/Models/FileByteFilterMatches.cs b/MagicBytesValidator.Tests/Models/FileByteFilterMatches.cs index e89b12f..932d42f 100644 --- a/MagicBytesValidator.Tests/Models/FileByteFilterMatches.cs +++ b/MagicBytesValidator.Tests/Models/FileByteFilterMatches.cs @@ -22,6 +22,69 @@ public void Should_not_match_pdf() Assert.False(pdf.Matches(pdfTestData)); } + [Fact] + public void Should_match_pdf_with_trailing_bytes_in_default_mode() + { + var pdf = new Pdf(); + + var pdfTestData = "%PDF-\n%%EOF\nTRAILING"u8.ToArray(); + + Assert.True(pdf.Matches(pdfTestData, FileByteType.Lazy)); + } + + [Fact] + public void Should_not_match_pdf_with_trailing_bytes_in_strict_mode() + { + var pdf = new Pdf(); + + var pdfTestData = "%PDF-\n%%EOF\nTRAILING"u8.ToArray(); + + Assert.False(pdf.Matches(pdfTestData, FileByteType.Strict)); + } + + [Fact] + public void Should_not_match_pdf_when_eof_is_not_within_last_1024_bytes_in_default_mode() + { + var pdf = new Pdf(); + + var prefix = "%PDF-\n"u8.ToArray(); + var eof = "%%EOF\n"u8.ToArray(); + + // put EOF early, then add >1024 bytes afterwards so it falls outside the last 1024 bytes + var trailing = new byte[1100]; + for (var i = 0; i < trailing.Length; i++) + { + trailing[i] = (byte)'A'; + } + + var pdfTestData = prefix + .Concat(eof) + .Concat(trailing) + .ToArray(); + + Assert.False(pdf.Matches(pdfTestData, FileByteType.Lazy)); + } + + [Fact] + public void Should_match_pdf_when_eof_marker_is_present_in_default_mode() + { + var pdf = new Pdf(); + + var pdfTestData = "%PDF-\n...%%EOF...TAIL"u8.ToArray(); + + Assert.True(pdf.Matches(pdfTestData, FileByteType.Lazy)); + } + + [Fact] + public void Should_match_pdf_in_strict_mode_when_eof_is_at_end() + { + var pdf = new Pdf(); + + var pdfTestData = "%PDF-\n%%EOF"u8.ToArray(); + + Assert.True(pdf.Matches(pdfTestData, FileByteType.Strict)); + } + [Fact] public void Should_match_ppt() { @@ -144,4 +207,4 @@ public void Should_match_heic() Assert.True(heic.Matches(testStream)); } -} \ No newline at end of file +} diff --git a/MagicBytesValidator.Tests/ValidatorIsValidAsync.cs b/MagicBytesValidator.Tests/ValidatorIsValidAsync.cs index 6403787..cf00f00 100644 --- a/MagicBytesValidator.Tests/ValidatorIsValidAsync.cs +++ b/MagicBytesValidator.Tests/ValidatorIsValidAsync.cs @@ -72,4 +72,4 @@ public async Task Should_fail_incorrect_magicByte_sequence() // Assert Assert.False(invalidMagicByte); } -} \ No newline at end of file +} diff --git a/MagicBytesValidator/Formats/Pdf.cs b/MagicBytesValidator/Formats/Pdf.cs index 33f0276..c00e8a5 100644 --- a/MagicBytesValidator/Formats/Pdf.cs +++ b/MagicBytesValidator/Formats/Pdf.cs @@ -4,21 +4,28 @@ namespace MagicBytesValidator.Formats; /// public class Pdf : FileByteFilter { - public Pdf() : base( - ["application/pdf"], - ["pdf"] - ) - { - StartsWith([0x25, 0x50, 0x44, 0x46, 0x2D]) - .EndsWithAnyOf( - [ - [0x25, 0x25, 0x45, 0x4F, 0x46], - [0x0A, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0A], - [0x0A, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0A, 0x20], - [0x0D, 0x0A, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0D, 0x0A], - [0x0D, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0D], /* looks strange, but garykessler says so. */ - [0x0D, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0A], - [0x0A, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0D, 0x0A], - ]); - } -} \ No newline at end of file + public Pdf() : base( + ["application/pdf"], + ["pdf"] + ) + { + StartsWith([0x25, 0x50, 0x44, 0x46, 0x2D]); // %PDF- + + EndsWithAnyOf( + [ + [0x25, 0x25, 0x45, 0x4F, 0x46], + [0x0A, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0A], + [0x0A, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0A, 0x20], + [0x0D, 0x0A, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0D, 0x0A], + [0x0D, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0D], // garykessler variant + [0x0D, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0A], + [0x0A, 0x25, 0x25, 0x45, 0x4F, 0x46, 0x0D, 0x0A] + ], FileByteType.Strict); + + TailContains( + 1024, + [0x25, 0x25, 0x45, 0x4F, 0x46], // %%EOF + FileByteType.Lazy + ); + } +} diff --git a/MagicBytesValidator/MagicBytesValidator.csproj b/MagicBytesValidator/MagicBytesValidator.csproj index ca22296..d3490f9 100644 --- a/MagicBytesValidator/MagicBytesValidator.csproj +++ b/MagicBytesValidator/MagicBytesValidator.csproj @@ -4,7 +4,7 @@ MagicBytesValidator traperto GmbH Validate files based on mime types, file extensions and magic byte sequences. - 2.1.6 + 2.2.0 Members of traperto GmbH https://github.com/Traperto/magic-bytes-validator/blob/main/README.md mime mimetype mimetypes magic magicbyte magicbytes extension extensions file @@ -17,7 +17,7 @@ traperto GmbH MIT - + true diff --git a/MagicBytesValidator/Models/FileByteFilter.cs b/MagicBytesValidator/Models/FileByteFilter.cs index 3af05cb..f935bb0 100644 --- a/MagicBytesValidator/Models/FileByteFilter.cs +++ b/MagicBytesValidator/Models/FileByteFilter.cs @@ -7,11 +7,26 @@ public abstract class FileByteFilter : IFileType private readonly List _neededByteChecks = []; private readonly List _oneOfEachByteChecks = []; private readonly List _anywhereByteChecks = []; + private readonly List _tailContainsChecks = []; + + private readonly List _neededByteChecksStrict = []; + private readonly List _oneOfEachByteChecksStrict = []; + private readonly List _anywhereByteChecksStrict = []; + private readonly List _tailContainsChecksStrict = []; + + private readonly List _neededByteChecksDefault = []; + private readonly List _oneOfEachByteChecksDefault = []; + private readonly List _anywhereByteChecksDefault = []; + private readonly List _tailContainsChecksDefault = []; + + private sealed record TailContainsCheck(int LastNBytes, byte?[] Pattern); + + private readonly FileByteType _defaultType; public string[] MimeTypes { get; } public string[] Extensions { get; } - public FileByteFilter(string[] mimeTypes, string[] extensions) + protected FileByteFilter(string[] mimeTypes, string[] extensions, FileByteType type = FileByteType.Strict) { if (!mimeTypes.Any() || mimeTypes.Any(string.IsNullOrEmpty)) { @@ -25,6 +40,7 @@ public FileByteFilter(string[] mimeTypes, string[] extensions) MimeTypes = mimeTypes; Extensions = extensions; + _defaultType = type; } public class ByteCheck(int offset, byte?[] bytesToCheck) @@ -33,12 +49,14 @@ public class ByteCheck(int offset, byte?[] bytesToCheck) public readonly byte?[] ByteArray = bytesToCheck; } - public bool Matches(byte[] fileByteStream) + public bool Matches(byte[] fileByteStream, FileByteType type = FileByteType.Lazy) { foreach (var neededByteCheck in _neededByteChecks) { if (!CheckBytes(neededByteCheck, fileByteStream)) + { return false; + } } foreach (var oneOf in _oneOfEachByteChecks) @@ -49,11 +67,10 @@ public bool Matches(byte[] fileByteStream) } } - // Then check byteArrays without fixed offsets - // mainly byteArrays from Anywhere() foreach (var byteCheckWithoutOffset in _anywhereByteChecks) { var found = false; + for (var index = 1; index <= fileByteStream.Length; index++) { if (byteCheckWithoutOffset.Cast() @@ -70,58 +87,228 @@ public bool Matches(byte[] fileByteStream) } } + foreach (var tc in _tailContainsChecks) + { + if (!CheckTailContains(tc, fileByteStream)) + { + return false; + } + } + + if (type == FileByteType.Strict) + { + foreach (var neededByteCheck in _neededByteChecksStrict) + { + if (!CheckBytes(neededByteCheck, fileByteStream)) + { + return false; + } + } + + foreach (var oneOf in _oneOfEachByteChecksStrict) + { + if (!oneOf.Any(byteToCheck => CheckBytes(byteToCheck, fileByteStream))) + { + return false; + } + } + + foreach (var byteCheckWithoutOffset in _anywhereByteChecksStrict) + { + var found = false; + + for (var index = 1; index <= fileByteStream.Length; index++) + { + if (byteCheckWithoutOffset.Cast() + .SequenceEqual(fileByteStream.Skip(index).Take(byteCheckWithoutOffset.Length))) + { + found = true; + break; + } + } + + if (!found) + { + return false; + } + } + + foreach (var tc in _tailContainsChecksStrict) + { + if (!CheckTailContains(tc, fileByteStream)) + { + return false; + } + } + + return true; + } + + if (type == FileByteType.Lazy) + { + foreach (var neededByteCheck in _neededByteChecksDefault) + { + if (!CheckBytes(neededByteCheck, fileByteStream)) + { + return false; + } + } + + foreach (var oneOf in _oneOfEachByteChecksDefault) + { + if (!oneOf.Any(byteToCheck => CheckBytes(byteToCheck, fileByteStream))) + { + return false; + } + } + + foreach (var byteCheckWithoutOffset in _anywhereByteChecksDefault) + { + var found = false; + + for (var index = 1; index <= fileByteStream.Length; index++) + { + if (byteCheckWithoutOffset.Cast() + .SequenceEqual(fileByteStream.Skip(index).Take(byteCheckWithoutOffset.Length))) + { + found = true; + break; + } + } + + if (!found) + { + return false; + } + } + + foreach (var tc in _tailContainsChecksDefault) + { + if (!CheckTailContains(tc, fileByteStream)) + { + return false; + } + } + } + return true; } - public FileByteFilter StartsWith(byte?[] bytesToCheck) + private List SelectNeededChecks(FileByteType? type) { - _neededByteChecks.Add(new ByteCheck(0, bytesToCheck)); + if (type == FileByteType.Strict) + { + return _neededByteChecksStrict; + } + + if (type == FileByteType.Lazy) + { + return _neededByteChecksDefault; + } + + return _neededByteChecks; + } + + private List SelectOneOfEachChecks(FileByteType? type) + { + if (type == FileByteType.Strict) + { + return _oneOfEachByteChecksStrict; + } + + if (type == FileByteType.Lazy) + { + return _oneOfEachByteChecksDefault; + } + + return _oneOfEachByteChecks; + } + + private List SelectAnywhereChecks(FileByteType? type) + { + if (type == FileByteType.Strict) + { + return _anywhereByteChecksStrict; + } + + if (type == FileByteType.Lazy) + { + return _anywhereByteChecksDefault; + } + + return _anywhereByteChecks; + } + + private List SelectTailContainsChecks(FileByteType? type) + { + if (type == FileByteType.Strict) + { + return _tailContainsChecksStrict; + } + + if (type == FileByteType.Lazy) + { + return _tailContainsChecksDefault; + } + + return _tailContainsChecks; + } + + public FileByteFilter StartsWith(byte?[] bytesToCheck, FileByteType? type = null) + { + SelectNeededChecks(type).Add(new ByteCheck(0, bytesToCheck)); return this; } - public FileByteFilter StartsWithAnyOf(byte?[][] bytesToCheck) + public FileByteFilter StartsWithAnyOf(byte?[][] bytesToCheck, FileByteType? type = null) { - _oneOfEachByteChecks.Add(bytesToCheck.Select(byteArray => new ByteCheck(0, byteArray)).ToArray()); + SelectOneOfEachChecks(type).Add(bytesToCheck.Select(byteArray => new ByteCheck(0, byteArray)).ToArray()); return this; } - public FileByteFilter EndsWith(byte?[] bytesToCheck) + public FileByteFilter EndsWith(byte?[] bytesToCheck, FileByteType? type = null) { - _neededByteChecks.Add(new ByteCheck(-bytesToCheck.Length, bytesToCheck)); + SelectNeededChecks(type).Add(new ByteCheck(-bytesToCheck.Length, bytesToCheck)); return this; } - public FileByteFilter EndsWithAnyOf(byte?[][] bytesToCheck) + public FileByteFilter EndsWithAnyOf(byte?[][] bytesToCheck, FileByteType? type = null) { - _oneOfEachByteChecks.Add(bytesToCheck.Select(byteArray => new ByteCheck(-byteArray.Length, byteArray)).ToArray()); + SelectOneOfEachChecks(type).Add(bytesToCheck.Select(byteArray => new ByteCheck(-byteArray.Length, byteArray)).ToArray()); return this; } - public FileByteFilter Anywhere(byte?[] bytesToCheck) + public FileByteFilter Anywhere(byte?[] bytesToCheck, FileByteType? type = null) { - _anywhereByteChecks.Add(bytesToCheck); + SelectAnywhereChecks(type).Add(bytesToCheck); return this; } - public FileByteFilter Anywhere(byte?[][] bytesToCheck) + public FileByteFilter Anywhere(byte?[][] bytesToCheck, FileByteType? type = null) { foreach (var byteArrayToCheck in bytesToCheck) { - Anywhere(byteArrayToCheck); + Anywhere(byteArrayToCheck, type); } return this; } - public FileByteFilter Specific(ByteCheck bytesToCheck) + public FileByteFilter Specific(ByteCheck bytesToCheck, FileByteType? type = null) { - _neededByteChecks.Add(bytesToCheck); + SelectNeededChecks(type).Add(bytesToCheck); return this; } - public FileByteFilter SpecificAnyOf(ByteCheck[] bytesToCheck) + public FileByteFilter SpecificAnyOf(ByteCheck[] bytesToCheck, FileByteType? type = null) { - _oneOfEachByteChecks.Add(bytesToCheck.Select(byteArray => byteArray).ToArray()); + SelectOneOfEachChecks(type).Add(bytesToCheck.Select(byteCheck => byteCheck).ToArray()); + return this; + } + + public FileByteFilter TailContains(int lastNBytes, byte?[] bytesToCheck, FileByteType? type = null) + { + SelectTailContainsChecks(type).Add(new TailContainsCheck(lastNBytes, bytesToCheck)); return this; } @@ -130,7 +317,9 @@ private bool CheckBytes(ByteCheck byteToCheck, byte[] fileStreamToCheck) // Check ending of file stream // since in the current format we have the fileStream Length only here calculate the offset if (byteToCheck.Offset < 0) + { byteToCheck.Offset = fileStreamToCheck.Length - byteToCheck.ByteArray.Length; + } if (fileStreamToCheck.Length - Math.Abs(byteToCheck.Offset) < byteToCheck.ByteArray.Length) { @@ -152,4 +341,45 @@ private bool CheckBytes(ByteCheck byteToCheck, byte[] fileStreamToCheck) return true; } -} \ No newline at end of file + + private static bool CheckTailContains(TailContainsCheck check, byte[] fileStreamToCheck) + { + var pattern = check.Pattern; + + if (pattern.Length == 0) + { + return true; + } + + var start = Math.Max(0, fileStreamToCheck.Length - check.LastNBytes); + var tailLength = fileStreamToCheck.Length - start; + + if (tailLength < pattern.Length) + { + return false; + } + + for (var tailOffset = 0; tailOffset <= tailLength - pattern.Length; tailOffset++) + { + var ok = true; + + for (var patternIndex = 0; patternIndex < pattern.Length; patternIndex++) + { + var b = pattern[patternIndex]; + + if (b.HasValue && fileStreamToCheck[start + tailOffset + patternIndex] != b.Value) + { + ok = false; + break; + } + } + + if (ok) + { + return true; + } + } + + return false; + } +} diff --git a/MagicBytesValidator/Models/FileByteType.cs b/MagicBytesValidator/Models/FileByteType.cs new file mode 100644 index 0000000..dd88f8d --- /dev/null +++ b/MagicBytesValidator/Models/FileByteType.cs @@ -0,0 +1,7 @@ +namespace MagicBytesValidator.Models; + +public enum FileByteType +{ + Strict = 0, + Lazy = 1 +} diff --git a/MagicBytesValidator/Models/IFileType.cs b/MagicBytesValidator/Models/IFileType.cs index b4aaa9b..5db8c7f 100644 --- a/MagicBytesValidator/Models/IFileType.cs +++ b/MagicBytesValidator/Models/IFileType.cs @@ -21,5 +21,5 @@ public interface IFileType /// /// Returns whether a given file (as byte array) matches the file type /// - public bool Matches(byte[] fileByteStream); -} \ No newline at end of file + public bool Matches(byte[] fileByteStream, FileByteType type = FileByteType.Lazy); +} diff --git a/MagicBytesValidator/Services/Http/FormFileTypeProvider.cs b/MagicBytesValidator/Services/Http/FormFileTypeProvider.cs index 89a05ee..b9926e5 100644 --- a/MagicBytesValidator/Services/Http/FormFileTypeProvider.cs +++ b/MagicBytesValidator/Services/Http/FormFileTypeProvider.cs @@ -3,88 +3,81 @@ /// public class FormFileTypeProvider : IFormFileTypeProvider { - private const char _FILE_EXTENSION_SEPARATOR = '.'; + private const char FileExtensionSeparator = '.'; - /// - public Mapping Mapping { get; } + /// + public Mapping Mapping { get; } - private readonly IValidator _validator; + private readonly IValidator _validator; - public FormFileTypeProvider( - Mapping? mapping = null, - IValidator? validator = null - ) - { - Mapping = mapping ?? new Mapping(); - _validator = validator ?? new Validator(Mapping); - } + public FormFileTypeProvider( + Mapping? mapping = null, + IValidator? validator = null + ) + { + Mapping = mapping ?? new Mapping(); + _validator = validator ?? new Validator(Mapping); + } - /// - [Obsolete("Use FindValidatedType instead.")] - public IFileType? FindFileTypeForFormFile(IFormFile formFile) - { - /* If the form file has a file name with an extension, we'll try to find the fileType by it first. - * If not, we'll try loading it by its given content type. */ - var fileType = formFile.FileName.Contains(_FILE_EXTENSION_SEPARATOR) - ? Mapping.FindByExtension(formFile.FileName.Split(_FILE_EXTENSION_SEPARATOR).Last()) - : Mapping.FindByMimeType(formFile.ContentType); + /// + [Obsolete("Use FindValidatedType instead.")] + public IFileType? FindFileTypeForFormFile(IFormFile formFile) + { + /* If the form file has a file name with an extension, we'll try to find the fileType by it first. + * If not, we'll try loading it by its given content type. */ + var fileType = formFile.FileName.Contains(FileExtensionSeparator) + ? Mapping.FindByExtension(formFile.FileName.Split(FileExtensionSeparator).Last()) + : Mapping.FindByMimeType(formFile.ContentType); - if (fileType is null) - { - /* We don't know about the files' extension or MIME type. */ - return null; - } + if (fileType is null) + { + /* We don't know about the files' extension or MIME type. */ + return null; + } - if (fileType.MimeTypes.Contains(formFile.ContentType) == false) - { - /* This can only occur if the given form file has a file name and its extension indicates a different - * MIME type as (also given) Content-Type. This *can* be an indicator that someone is trying to - * mess with us. As we are a bit paranoid and also the file type is not unambiguous, we'll throw. */ - throw new MimeTypeMismatchException(fileType.MimeTypes, formFile.ContentType); - } + if (fileType.MimeTypes.Contains(formFile.ContentType) == false) + { + /* This can only occur if the given form file has a file name and its extension indicates a different + * MIME type as (also given) Content-Type. This *can* be an indicator that someone is trying to + * mess with us. As we are a bit paranoid and also the file type is not unambiguous, we'll throw. */ + throw new MimeTypeMismatchException(fileType.MimeTypes, formFile.ContentType); + } - return fileType; - } + return fileType; + } - /// - public async Task FindValidatedTypeAsync( - IFormFile formFile, - Stream? formFileStream, - CancellationToken cancellationToken - ) - { - var fileTypeByContentType = Mapping.FindByMimeType(formFile.ContentType); - if (fileTypeByContentType is null) - { - return null; - } + public async Task FindValidatedTypeAsync( + IFormFile formFile, + Stream? formFileStream, + CancellationToken cancellationToken, + FileByteType validationType = FileByteType.Strict + ) + { + var fileTypeByContentType = Mapping.FindByMimeType(formFile.ContentType); + if (fileTypeByContentType is null) + { + return null; + } - var fileTypeByExtension = formFile.FileName.Contains(_FILE_EXTENSION_SEPARATOR) - ? Mapping.FindByExtension(formFile.FileName.Split(_FILE_EXTENSION_SEPARATOR).Last()) - : null; + var fileTypeByExtension = formFile.FileName.Contains(FileExtensionSeparator) + ? Mapping.FindByExtension(formFile.FileName.Split(FileExtensionSeparator).Last()) + : null; - if ( - fileTypeByExtension is not null - && fileTypeByExtension.GetType() != fileTypeByContentType.GetType() - ) - { - /* This can only occur if the given form file has a file name and its extension indicates a different - * MIME type as (also given) Content-Type. This *can* be an indicator that someone is trying to - * mess with us. As we are a bit paranoid and also the file type is not unambiguous, we'll throw. */ - throw new MimeTypeMismatchException(fileTypeByExtension.MimeTypes, formFile.ContentType); - } + if (fileTypeByExtension is not null + && fileTypeByExtension.GetType() != fileTypeByContentType.GetType()) + { + throw new MimeTypeMismatchException(fileTypeByExtension.MimeTypes, formFile.ContentType); + } - var contentIsValid = await _validator.IsValidAsync( - formFileStream ?? formFile.OpenReadStream(), - fileTypeByContentType, - cancellationToken - ); + var contentIsValid = await _validator.IsValidAsync( + formFileStream ?? formFile.OpenReadStream(), + fileTypeByContentType, + cancellationToken, + validationType + ); - if (!contentIsValid) - { - throw new MimeTypeMismatchException(formFile.ContentType); - } - - return fileTypeByContentType; - } -} \ No newline at end of file + return !contentIsValid + ? throw new MimeTypeMismatchException(formFile.ContentType) + : fileTypeByContentType; + } +} diff --git a/MagicBytesValidator/Services/Http/IFormFileTypeProvider.cs b/MagicBytesValidator/Services/Http/IFormFileTypeProvider.cs index c282920..1586c30 100644 --- a/MagicBytesValidator/Services/Http/IFormFileTypeProvider.cs +++ b/MagicBytesValidator/Services/Http/IFormFileTypeProvider.cs @@ -5,43 +5,48 @@ /// public interface IFormFileTypeProvider { - /// - /// Mapping that is used for providing information - /// - Mapping Mapping { get; } + /// + /// Mapping that is used for providing information + /// + Mapping Mapping { get; } - /// - /// Tries to find matching FileType for given IFormFile. - /// - /// - /// When file-type by extension and given content-type (IFormFile.ContentType) differ. - /// In this case, someone could try to circumvent the validation. - /// - [Obsolete("Use FindValidatedType instead.")] - IFileType? FindFileTypeForFormFile(IFormFile formFile); + /// + /// Tries to find matching FileType for given IFormFile. + /// + /// + /// When file-type by extension and given content-type (IFormFile.ContentType) differ. + /// In this case, someone could try to circumvent the validation. + /// + [Obsolete("Use FindValidatedType instead.")] + IFileType? FindFileTypeForFormFile(IFormFile formFile); - /// - /// Tries to find matching for given that also matches - /// the content of the form file. - /// - /// that the should be found for - /// - /// Optional. If the file stream for the form file is already loaded, it can be included here. - /// This prevents opening a read stream for the same file multiple times. - /// However, never include streams of other files than the given form file! Otherwise the validation may be - /// wrong and could be circumvented! - /// - /// CancellationToken - /// - /// that matches by the form files content type, content (and extension, if given) - /// - /// - /// When file-type by extension and given content-type (IFormFile.ContentType) differ. - /// In this case, someone could try to circumvent the validation. - /// - Task FindValidatedTypeAsync( - IFormFile formFile, - Stream? formFileStream, - CancellationToken cancellationToken - ); -} \ No newline at end of file + /// + /// Tries to find matching for given that also matches + /// the content of the form file. + /// + /// that the should be found for + /// + /// Optional. If the file stream for the form file is already loaded, it can be included here. + /// This prevents opening a read stream for the same file multiple times. + /// However, never include streams of other files than the given form file! Otherwise the validation may be + /// wrong and could be circumvented! + /// + /// CancellationToken + /// + /// Optional. Controls the validation strictness (e.g. strict vs. relaxed rules for certain formats). + /// Defaults to . + /// + /// + /// that matches by the form files content type, content (and extension, if given) + /// + /// + /// When file-type by extension and given content-type (IFormFile.ContentType) differ. + /// In this case, someone could try to circumvent the validation. + /// + Task FindValidatedTypeAsync( + IFormFile formFile, + Stream? formFileStream, + CancellationToken cancellationToken, + FileByteType validationType = FileByteType.Strict + ); +} diff --git a/MagicBytesValidator/Services/IValidator.cs b/MagicBytesValidator/Services/IValidator.cs index 95eed2a..7259a46 100644 --- a/MagicBytesValidator/Services/IValidator.cs +++ b/MagicBytesValidator/Services/IValidator.cs @@ -10,5 +10,10 @@ public interface IValidator /// /// Validates a given file-Stream against a given FileType and returns if the Stream is valid or not. /// - Task IsValidAsync(Stream fileStream, IFileType fileType, CancellationToken cancellationToken); -} \ No newline at end of file + Task IsValidAsync( + Stream fileStream, + IFileType fileType, + CancellationToken cancellationToken, + FileByteType validationType = FileByteType.Strict + ); +} diff --git a/MagicBytesValidator/Services/Validator.cs b/MagicBytesValidator/Services/Validator.cs index d51a8ef..d2ee805 100644 --- a/MagicBytesValidator/Services/Validator.cs +++ b/MagicBytesValidator/Services/Validator.cs @@ -2,25 +2,30 @@ public class Validator : IValidator { - /// - public Mapping Mapping { get; } + /// + public Mapping Mapping { get; } - public Validator(Mapping? mapping = null) - { - Mapping = mapping ?? new Mapping(); - } + public Validator(Mapping? mapping = null) + { + Mapping = mapping ?? new Mapping(); + } - /// - public async Task IsValidAsync(Stream fileStream, IFileType fileType, CancellationToken cancellationToken) - { - var previousStreamPosition = fileStream.Position; - fileStream.Position = 0; + /// + public async Task IsValidAsync( + Stream fileStream, + IFileType fileType, + CancellationToken cancellationToken, + FileByteType validationType = FileByteType.Strict + ) + { + var previousStreamPosition = fileStream.Position; + fileStream.Position = 0; - var streamBuffer = new byte[fileStream.Length]; - _ = await fileStream.ReadAsync(streamBuffer, cancellationToken); + var streamBuffer = new byte[fileStream.Length]; + _ = await fileStream.ReadAsync(streamBuffer, cancellationToken); - fileStream.Position = previousStreamPosition; + fileStream.Position = previousStreamPosition; - return fileType.Matches(streamBuffer); - } -} \ No newline at end of file + return fileType.Matches(streamBuffer, validationType); + } +} diff --git a/README.md b/README.md index 0aa243d..4c0a358 100644 --- a/README.md +++ b/README.md @@ -7,16 +7,16 @@ The existing `IFileType`s can be expanded in various ways. - Install nuget package into your project: ```powershell -Install-Package MagicBytesValidator -Version 2.1.6 +Install-Package MagicBytesValidator -Version 2.2.0 ``` ```bash -dotnet add package MagicBytesValidator --version 2.1.6 +dotnet add package MagicBytesValidator --version 2.2.0 ``` - Reference in your csproj: ```xml - + ``` ### How to use it? @@ -47,6 +47,140 @@ var fileType = await streamFileTypeProvider.TryFindUnambiguousAsync(fileStream, ```c# var isValid = await validator.IsValidAsync(memoryStream, fileType, CancellationToken.None); ``` +## Validation strictness (FileByteType) + +Some formats support multiple validation strategies (e.g. strict vs. lazy rules). +For this purpose, the library exposes `FileByteType`: + +- `FileByteType.Strict` (default) +- `FileByteType.Lazy` (optional relaxed/"lazy" rules for certain formats) + +### Validate an uploaded IFormFile with a specific validation type + +The form file provider accepts an optional `validationType` parameter: + +```c# +var fileType = await formFileTypeProvider.FindValidatedTypeAsync( + formFile, + null, + CancellationToken.None, + validationType: MagicBytesValidator.Models.FileByteType.Lazy +); +``` + +Example using `Lazy` (lazy rules if the format supports it): + +```c# +var fileType = await formFileTypeProvider.FindValidatedTypeAsync( + formFile, + null, + CancellationToken.None, + validationType: MagicBytesValidator.Models.FileByteType.Lazy +); +``` + +### Validate a stream with a specific validation type + +The validator also accepts an optional `validationType` parameter: + +```c# +var isValid = await validator.IsValidAsync( + memoryStream, + fileType, + CancellationToken.None, + validationType: MagicBytesValidator.Models.FileByteType.Strict +); +``` + +Example using `Lazy`: + +```c# +var isValid = await validator.IsValidAsync( + memoryStream, + fileType, + CancellationToken.None, + validationType: MagicBytesValidator.Models.FileByteType.Lazy +); +``` + +> Note: If a format does not define any `Lazy`-specific checks, `Lazy` behaves like “global checks only”. +> This keeps existing formats unchanged unless they opt into mode-specific rules. + +### Example: Lazy ("relaxed") PDF validation + +Some PDFs contain additional trailing bytes after the `%%EOF` marker. While strict validation may require the file +to end with `%%EOF`, lazy validation can accept the `%%EOF` marker anywhere within the last 1024 bytes of the file +(behaviour tolerated by common PDF viewers). + +## Expand the file type mapping + +- Get mapping: + +```csharp +// use the validator: +var mapping = validator.Mapping; + +// use the formFileTypeProvider: +var mapping = formFileTypeProvider.Mapping; + +// or create a new instance of the mapping: +var mapping = new MagicBytesValidator.Services.Mapping(); +``` + +- Register a single `FileByteFilter`: + +```csharp +mapping.Register( + new FileByteFilter( + "traperto/trp", // MIME type + new[] { "trp" } // file extensions + ) { + // magic byte sequences + StartsWith(new byte?[] + { + 0x78, 0x6c, 0x2f, 0x5f, 0x72, 0x65 + }) + .EndsWith(new byte?[] + { + 0xFF, 0xFF + }) + } +); +``` + +- `FileByteFilter`s with specific offset checks: + +```csharp +mapping.Register( + new FileByteFilter( + "traperto/trp", // MIME type + new[] { "trp" } // file extensions + ) { + // magic byte sequences + Specific(new ByteCheck(512, new byte?[] { 0xFD })); + } +); +``` + +`ByteCheck` allows for negative offset values to look for a specific offset counting from the end of file. + +### Optional: register mode-specific magic byte checks (Strict/Lazy) + +When configuring a `FileByteFilter`, fluent methods accept an optional `FileByteType` parameter. +If omitted, the check is global (applies to all validation types). If specified, the check applies only +to that validation type. + +Example: + +```csharp +StartsWith(new byte?[] { 0x25, 0x50, 0x44, 0x46, 0x2D }) // global + .EndsWithAnyOf(new[] + { + new byte?[] { 0x25, 0x25, 0x45, 0x4F, 0x46 } + }, MagicBytesValidator.Models.FileByteType.Strict) // strict only + .TailContains(1024, new byte?[] { 0x25, 0x25, 0x45, 0x4F, 0x46 }, + MagicBytesValidator.Models.FileByteType.Lazy); // lazy only +``` #### Expand the file type mapping @@ -59,7 +193,7 @@ var mapping = validator.Mapping; var mapping = formFileTypeProvider.Mapping; // or create a new instance of the mapping: -var mapping = new MagicBytesValidator.Services.Mapping(); +var mapping = new MagicBytesValidator.Services.Mapping(); ``` - Register a single `FileByteFilter`: @@ -191,4 +325,4 @@ This can be useful when debugging or validating newly added FileTypes. ▓▓ ▓▓▓▓▓▓▓▓▓▓ ▓▓ ▓▓▓▓▓▓▓▓▓ ▓▓ -``` \ No newline at end of file +```