diff --git a/src/Umbraco.Infrastructure/PropertyEditors/GridPropertyEditor.cs b/src/Umbraco.Infrastructure/PropertyEditors/GridPropertyEditor.cs index e0140b65b38e..f87590884951 100644 --- a/src/Umbraco.Infrastructure/PropertyEditors/GridPropertyEditor.cs +++ b/src/Umbraco.Infrastructure/PropertyEditors/GridPropertyEditor.cs @@ -224,7 +224,9 @@ public GridPropertyValueEditor( if (html is not null) { - var parseAndSavedTempImages = _pastedImages.FindAndPersistPastedTempImages(html, mediaParentId, userId, _imageUrlGenerator); + var parseAndSaveBase64Images = _pastedImages.FindAndPersistEmbeddedImages( + html, mediaParentId, userId); + var parseAndSavedTempImages = _pastedImages.FindAndPersistPastedTempImages(parseAndSaveBase64Images, mediaParentId, userId); var editorValueWithMediaUrlsRemoved = _imageSourceParser.RemoveImageSources(parseAndSavedTempImages); rte.Value = editorValueWithMediaUrlsRemoved; } diff --git a/src/Umbraco.Infrastructure/PropertyEditors/RichTextEditorPastedImages.cs b/src/Umbraco.Infrastructure/PropertyEditors/RichTextEditorPastedImages.cs index 5044a5b13e95..8dbe6ad5b351 100644 --- a/src/Umbraco.Infrastructure/PropertyEditors/RichTextEditorPastedImages.cs +++ b/src/Umbraco.Infrastructure/PropertyEditors/RichTextEditorPastedImages.cs @@ -1,8 +1,14 @@ // Copyright (c) Umbraco. // See LICENSE for more details. +using System.Text; +using System.Text.RegularExpressions; +using System.Web; using HtmlAgilityPack; +using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Umbraco.Cms.Core.Configuration.Models; using Umbraco.Cms.Core.Exceptions; using Umbraco.Cms.Core.Hosting; using Umbraco.Cms.Core.IO; @@ -13,6 +19,7 @@ using Umbraco.Cms.Core.Services; using Umbraco.Cms.Core.Strings; using Umbraco.Cms.Core.Web; +using Umbraco.Cms.Web.Common.DependencyInjection; using Umbraco.Extensions; namespace Umbraco.Cms.Core.PropertyEditors; @@ -30,7 +37,11 @@ public sealed class RichTextEditorPastedImages private readonly IShortStringHelper _shortStringHelper; private readonly IUmbracoContextAccessor _umbracoContextAccessor; private readonly string _tempFolderAbsolutePath; + private readonly IImageUrlGenerator _imageUrlGenerator; + private readonly ContentSettings _contentSettings; + private readonly Dictionary _uploadedImages = new(); + [Obsolete("Use the ctor which takes an IImageUrlGenerator and IOptions instead, scheduled for removal in v14")] public RichTextEditorPastedImages( IUmbracoContextAccessor umbracoContextAccessor, ILogger logger, @@ -41,6 +52,33 @@ public RichTextEditorPastedImages( MediaUrlGeneratorCollection mediaUrlGenerators, IShortStringHelper shortStringHelper, IPublishedUrlProvider publishedUrlProvider) + : this( + umbracoContextAccessor, + logger, + hostingEnvironment, + mediaService, + contentTypeBaseServiceProvider, + mediaFileManager, + mediaUrlGenerators, + shortStringHelper, + publishedUrlProvider, + StaticServiceProvider.Instance.GetRequiredService(), + StaticServiceProvider.Instance.GetRequiredService>()) + { + } + + public RichTextEditorPastedImages( + IUmbracoContextAccessor umbracoContextAccessor, + ILogger logger, + IHostingEnvironment hostingEnvironment, + IMediaService mediaService, + IContentTypeBaseServiceProvider contentTypeBaseServiceProvider, + MediaFileManager mediaFileManager, + MediaUrlGeneratorCollection mediaUrlGenerators, + IShortStringHelper shortStringHelper, + IPublishedUrlProvider publishedUrlProvider, + IImageUrlGenerator imageUrlGenerator, + IOptions contentSettings) { _umbracoContextAccessor = umbracoContextAccessor ?? throw new ArgumentNullException(nameof(umbracoContextAccessor)); @@ -53,152 +91,267 @@ public RichTextEditorPastedImages( _mediaUrlGenerators = mediaUrlGenerators; _shortStringHelper = shortStringHelper; _publishedUrlProvider = publishedUrlProvider; + _imageUrlGenerator = imageUrlGenerator; + _contentSettings = contentSettings.Value; _tempFolderAbsolutePath = _hostingEnvironment.MapPathContentRoot(Constants.SystemDirectories.TempImageUploads); - } /// - /// Used by the RTE (and grid RTE) for drag/drop/persisting images + /// Used by the RTE (and grid RTE) for converting inline base64 images to Media items. /// - public string FindAndPersistPastedTempImages(string html, Guid mediaParentFolder, int userId, IImageUrlGenerator imageUrlGenerator) + /// HTML from the Rich Text Editor property editor. + /// + /// + /// Formatted HTML. + /// Thrown if image extension is not allowed + internal string FindAndPersistEmbeddedImages(string html, Guid mediaParentFolder, int userId) { // Find all img's that has data-tmpimg attribute // Use HTML Agility Pack - https://html-agility-pack.net var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(html); - HtmlNodeCollection? tmpImages = htmlDoc.DocumentNode.SelectNodes($"//img[@{TemporaryImageDataAttribute}]"); - if (tmpImages == null || tmpImages.Count == 0) + HtmlNodeCollection? imagesWithDataUris = htmlDoc.DocumentNode.SelectNodes("//img"); + if (imagesWithDataUris is null || imagesWithDataUris.Count is 0) { return html; } - // An array to contain a list of URLs that - // we have already processed to avoid dupes - var uploadedImages = new Dictionary(); - - - foreach (HtmlNode? img in tmpImages) + foreach (HtmlNode? img in imagesWithDataUris) { - // The data attribute contains the path to the tmp img to persist as a media item - var tmpImgPath = img.GetAttributeValue(TemporaryImageDataAttribute, string.Empty); + var srcValue = img.GetAttributeValue("src", string.Empty); - if (string.IsNullOrEmpty(tmpImgPath)) + // Ignore src-less images + if (string.IsNullOrEmpty(srcValue)) { continue; } + // Take only images that have a "data:image" uri into consideration + if (!srcValue.StartsWith("data:image")) + { + continue; + } - var absoluteTempImagePath = Path.GetFullPath(_hostingEnvironment.MapPathContentRoot(tmpImgPath)); + // Create tmp image by scanning the srcValue + // the value will look like "data:image/jpg;base64,abc" where the first part + // is the mimetype and the second (after the comma) is the image blob + Match dataUriInfo = Regex.Match(srcValue, @"^data:\w+\/(?\w+)[\w\+]*?;(?\w+),(?.+)$"); - if (IsValidPath(absoluteTempImagePath) == false) + // If it turns up false, it was probably a false-positive and we can't do anything with it + if (dataUriInfo.Success is false) { continue; } - var fileName = Path.GetFileName(absoluteTempImagePath); - var safeFileName = fileName.ToSafeFileName(_shortStringHelper); - - var mediaItemName = safeFileName.ToFriendlyName(); - IMedia mediaFile; - GuidUdi udi; + var ext = dataUriInfo.Groups["ext"].Value.ToLowerInvariant(); + var encoding = dataUriInfo.Groups["encoding"].Value.ToLowerInvariant(); + var imageData = dataUriInfo.Groups["data"].Value; - if (uploadedImages.ContainsKey(tmpImgPath) == false) + if (_contentSettings.IsFileAllowedForUpload(ext) is false) { - if (mediaParentFolder == Guid.Empty) - { - mediaFile = _mediaService.CreateMedia(mediaItemName, Constants.System.Root, Constants.Conventions.MediaTypes.Image, userId); - } - else - { - mediaFile = _mediaService.CreateMedia(mediaItemName, mediaParentFolder, Constants.Conventions.MediaTypes.Image, userId); - } + // If the image format is not supported we should probably leave it be + // since the user decided to include it. + // If we accepted it anyway, they could technically circumvent the allow list for file types, + // but the user experience would not be very good if we simply failed to save the content. + // Besides, there may be other types of data uri images technically supported by a browser that we cannot handle. + _logger.LogWarning( + "Performance impact: Could not convert embedded image to a Media item because the file extension {Ext} was not allowed. HTML extract: {OuterHtml}", + ext, + img.OuterHtml.Length < 100 ? img.OuterHtml : img.OuterHtml[..100]); // only log the first 100 chars because base64 images can be very long + continue; + } - var fileInfo = new FileInfo(absoluteTempImagePath); + // Create an unique folder path to help with concurrent users to avoid filename clash + var imageTempPath = + _hostingEnvironment.MapPathContentRoot(Constants.SystemDirectories.TempImageUploads + Path.DirectorySeparatorChar + Guid.NewGuid()); - FileStream? fileStream = fileInfo.OpenReadWithRetry(); - if (fileStream == null) - { - throw new InvalidOperationException("Could not acquire file stream"); - } - - using (fileStream) - { - mediaFile.SetValue(_mediaFileManager, _mediaUrlGenerators, _shortStringHelper, _contentTypeBaseServiceProvider, Constants.Conventions.Media.File, safeFileName, fileStream); - } + // Ensure image temp path exists + if (Directory.Exists(imageTempPath) is false) + { + Directory.CreateDirectory(imageTempPath); + } - _mediaService.Save(mediaFile, userId); + // To get the filename, we simply manipulate the mimetype into a filename + var filePath = $"image.{ext}"; + var safeFileName = filePath.ToSafeFileName(_shortStringHelper); + var tmpImgPath = imageTempPath + Path.DirectorySeparatorChar + safeFileName; + var absoluteTempImagePath = Path.GetFullPath(tmpImgPath); - udi = mediaFile.GetUdi(); + // Convert the base64 content to a byte array and save the bytes directly to a file + // this method should work for most use-cases + if (encoding.Equals("base64")) + { + System.IO.File.WriteAllBytes(absoluteTempImagePath, Convert.FromBase64String(imageData)); } else { - // Already been uploaded & we have it's UDI - udi = uploadedImages[tmpImgPath]; + System.IO.File.WriteAllText(absoluteTempImagePath, HttpUtility.HtmlDecode(imageData), Encoding.UTF8); } - // Add the UDI to the img element as new data attribute - img.SetAttributeValue("data-udi", udi.ToString()); + // When the temp file has been created, we can persist it + PersistMediaItem(mediaParentFolder, userId, img, tmpImgPath); + } + + return htmlDoc.DocumentNode.OuterHtml; + } + + /// + /// Used by the RTE (and grid RTE) for drag/drop/persisting images. + /// + /// HTML from the Rich Text Editor property editor. + /// + /// + /// + /// Formatted HTML. + [Obsolete("It is not needed to supply the imageUrlGenerator parameter")] + public string FindAndPersistPastedTempImages(string html, Guid mediaParentFolder, int userId, IImageUrlGenerator imageUrlGenerator) => + FindAndPersistPastedTempImages(html, mediaParentFolder, userId); + + /// + /// Used by the RTE (and grid RTE) for drag/drop/persisting images. + /// + /// HTML from the Rich Text Editor property editor. + /// + /// + /// Formatted HTML. + public string FindAndPersistPastedTempImages(string html, Guid mediaParentFolder, int userId) + { + // Find all img's that has data-tmpimg attribute + // Use HTML Agility Pack - https://html-agility-pack.net + var htmlDoc = new HtmlDocument(); + htmlDoc.LoadHtml(html); + + HtmlNodeCollection? tmpImages = htmlDoc.DocumentNode.SelectNodes($"//img[@{TemporaryImageDataAttribute}]"); + if (tmpImages is null || tmpImages.Count is 0) + { + return html; + } + + foreach (HtmlNode? img in tmpImages) + { + // The data attribute contains the path to the tmp img to persist as a media item + var tmpImgPath = img.GetAttributeValue(TemporaryImageDataAttribute, string.Empty); - // Get the new persisted image URL - _umbracoContextAccessor.TryGetUmbracoContext(out IUmbracoContext? umbracoContext); - IPublishedContent? mediaTyped = umbracoContext?.Media?.GetById(udi.Guid); - if (mediaTyped == null) + if (string.IsNullOrEmpty(tmpImgPath)) { - throw new PanicException( - $"Could not find media by id {udi.Guid} or there was no UmbracoContext available."); + continue; } - var location = mediaTyped.Url(_publishedUrlProvider); + var qualifiedTmpImgPath = _hostingEnvironment.MapPathContentRoot(tmpImgPath); - // Find the width & height attributes as we need to set the imageprocessor QueryString - var width = img.GetAttributeValue("width", int.MinValue); - var height = img.GetAttributeValue("height", int.MinValue); + PersistMediaItem(mediaParentFolder, userId, img, qualifiedTmpImgPath); + } - if (width != int.MinValue && height != int.MinValue) + return htmlDoc.DocumentNode.OuterHtml; + } + + private void PersistMediaItem(Guid mediaParentFolder, int userId, HtmlNode img, string qualifiedTmpImgPath) + { + var absoluteTempImagePath = Path.GetFullPath(qualifiedTmpImgPath); + + if (IsValidPath(absoluteTempImagePath) is false) + { + return; + } + + var fileName = Path.GetFileName(absoluteTempImagePath); + var safeFileName = fileName.ToSafeFileName(_shortStringHelper); + + var mediaItemName = safeFileName.ToFriendlyName(); + GuidUdi udi; + + if (_uploadedImages.ContainsKey(qualifiedTmpImgPath) is false) + { + var isSvg = qualifiedTmpImgPath.EndsWith(".svg"); + var mediaType = isSvg + ? Constants.Conventions.MediaTypes.VectorGraphicsAlias + : Constants.Conventions.MediaTypes.Image; + + IMedia mediaFile = mediaParentFolder == Guid.Empty + ? _mediaService.CreateMedia(mediaItemName, Constants.System.Root, mediaType, userId) + : _mediaService.CreateMedia(mediaItemName, mediaParentFolder, mediaType, userId); + + var fileInfo = new FileInfo(absoluteTempImagePath); + + FileStream? fileStream = fileInfo.OpenReadWithRetry(); + if (fileStream is null) { - location = imageUrlGenerator.GetImageUrl(new ImageUrlGenerationOptions(location) - { - ImageCropMode = ImageCropMode.Max, - Width = width, - Height = height, - }); + throw new InvalidOperationException("Could not acquire file stream"); } - img.SetAttributeValue("src", location); + using (fileStream) + { + mediaFile.SetValue(_mediaFileManager, _mediaUrlGenerators, _shortStringHelper, + _contentTypeBaseServiceProvider, Constants.Conventions.Media.File, safeFileName, fileStream); + } + + _mediaService.Save(mediaFile, userId); + + udi = mediaFile.GetUdi(); + } + else + { + // Already been uploaded & we have it's UDI + udi = _uploadedImages[qualifiedTmpImgPath]; + } - // Remove the data attribute (so we do not re-process this) - img.Attributes.Remove(TemporaryImageDataAttribute); + // Add the UDI to the img element as new data attribute + img.SetAttributeValue("data-udi", udi.ToString()); - // Add to the dictionary to avoid dupes - if (uploadedImages.ContainsKey(tmpImgPath) == false) + // Get the new persisted image URL + _umbracoContextAccessor.TryGetUmbracoContext(out IUmbracoContext? umbracoContext); + IPublishedContent? mediaTyped = umbracoContext?.Media?.GetById(udi.Guid); + if (mediaTyped is null) + { + throw new PanicException( + $"Could not find media by id {udi.Guid} or there was no UmbracoContext available."); + } + + var location = mediaTyped.Url(_publishedUrlProvider); + + // Find the width & height attributes as we need to set the imageprocessor QueryString + var width = img.GetAttributeValue("width", int.MinValue); + var height = img.GetAttributeValue("height", int.MinValue); + + if (width != int.MinValue && height != int.MinValue) + { + location = _imageUrlGenerator.GetImageUrl(new ImageUrlGenerationOptions(location) { - uploadedImages.Add(tmpImgPath, udi); + ImageCropMode = ImageCropMode.Max, + Width = width, + Height = height, + }); + } - // Delete folder & image now its saved in media - // The folder should contain one image - as a unique guid folder created - // for each image uploaded from TinyMceController - var folderName = Path.GetDirectoryName(absoluteTempImagePath); - try - { - if (folderName is not null) - { - Directory.Delete(folderName, true); - } - } - catch (Exception ex) + img.SetAttributeValue("src", location); + + // Remove the data attribute (so we do not re-process this) + img.Attributes.Remove(TemporaryImageDataAttribute); + + // Add to the dictionary to avoid dupes + if (_uploadedImages.ContainsKey(qualifiedTmpImgPath) is false) + { + _uploadedImages.Add(qualifiedTmpImgPath, udi); + + // Delete folder & image now its saved in media + // The folder should contain one image - as a unique guid folder created + // for each image uploaded from TinyMceController + var folderName = Path.GetDirectoryName(absoluteTempImagePath); + try + { + if (folderName is not null) { - _logger.LogError(ex, "Could not delete temp file or folder {FileName}", absoluteTempImagePath); + Directory.Delete(folderName, true); } } + catch (Exception ex) + { + _logger.LogError(ex, "Could not delete temp file or folder {FileName}", absoluteTempImagePath); + } } - - return htmlDoc.DocumentNode.OuterHtml; } - private bool IsValidPath(string imagePath) - { - return imagePath.StartsWith(_tempFolderAbsolutePath); - } + private bool IsValidPath(string imagePath) => imagePath.StartsWith(_tempFolderAbsolutePath); } diff --git a/src/Umbraco.Infrastructure/PropertyEditors/RichTextPropertyEditor.cs b/src/Umbraco.Infrastructure/PropertyEditors/RichTextPropertyEditor.cs index 21313b3b773c..c11498af7abe 100644 --- a/src/Umbraco.Infrastructure/PropertyEditors/RichTextPropertyEditor.cs +++ b/src/Umbraco.Infrastructure/PropertyEditors/RichTextPropertyEditor.cs @@ -293,8 +293,10 @@ public IEnumerable GetReferences(object? value) return null; } + var parseAndSaveBase64Images = _pastedImages.FindAndPersistEmbeddedImages( + editorValue.Value.ToString()!, mediaParentId, userId); var parseAndSavedTempImages = - _pastedImages.FindAndPersistPastedTempImages(editorValue.Value.ToString()!, mediaParentId, userId, _imageUrlGenerator); + _pastedImages.FindAndPersistPastedTempImages(parseAndSaveBase64Images, mediaParentId, userId); var editorValueWithMediaUrlsRemoved = _imageSourceParser.RemoveImageSources(parseAndSavedTempImages); var parsed = MacroTagParser.FormatRichTextContentForPersistence(editorValueWithMediaUrlsRemoved); var sanitized = _htmlSanitizer.Sanitize(parsed); diff --git a/tests/Umbraco.Tests.Integration/Umbraco.Web.BackOffice/Controllers/ContentControllerTests.cs b/tests/Umbraco.Tests.Integration/Umbraco.Web.BackOffice/Controllers/ContentControllerTests.cs index 70db8b3ce258..d1a3caa2ea15 100644 --- a/tests/Umbraco.Tests.Integration/Umbraco.Web.BackOffice/Controllers/ContentControllerTests.cs +++ b/tests/Umbraco.Tests.Integration/Umbraco.Web.BackOffice/Controllers/ContentControllerTests.cs @@ -664,4 +664,87 @@ public async Task PostSave_Checks_Ancestors_For_Domains() Assert.AreEqual(0, display.Notifications.Count(x => x.NotificationType == NotificationStyle.Warning)); }); } + + [TestCase( + @"

", + false)] + [TestCase( + @"

"">

", + false)] + [TestCase( + @"

", + false)] + [TestCase( + @"

", + true)] + public async Task PostSave_Simple_RichText_With_Base64(string html, bool shouldHaveDataUri) + { + var url = PrepareApiControllerUrl(x => x.PostSave(null)); + + var dataTypeService = GetRequiredService(); + var contentService = GetRequiredService(); + var contentTypeService = GetRequiredService(); + + var dataType = new DataTypeBuilder() + .WithId(0) + .WithoutIdentity() + .WithDatabaseType(ValueStorageType.Ntext) + .AddEditor() + .WithAlias(Constants.PropertyEditors.Aliases.TinyMce) + .Done() + .Build(); + + dataTypeService.Save(dataType); + + var contentType = new ContentTypeBuilder() + .WithId(0) + .AddPropertyType() + .WithDataTypeId(dataType.Id) + .WithAlias("richText") + .WithName("Rich Text") + .Done() + .WithContentVariation(ContentVariation.Nothing) + .Build(); + + contentTypeService.Save(contentType); + + var content = new ContentBuilder() + .WithId(0) + .WithName("Invariant") + .WithContentType(contentType) + .AddPropertyData() + .WithKeyValue("richText", html) + .Done() + .Build(); + contentService.SaveAndPublish(content); + var model = new ContentItemSaveBuilder() + .WithContent(content) + .Build(); + + // Act + var response = + await Client.PostAsync(url, new MultipartFormDataContent {{new StringContent(JsonConvert.SerializeObject(model)), "contentItem"}}); + + // Assert + var body = await response.Content.ReadAsStringAsync(); + + body = body.TrimStart(AngularJsonMediaTypeFormatter.XsrfPrefix); + + Assert.Multiple(() => + { + Assert.AreEqual(HttpStatusCode.OK, response.StatusCode, body); + var display = JsonConvert.DeserializeObject(body); + var bodyText = display.Variants.FirstOrDefault()?.Tabs.FirstOrDefault()?.Properties + ?.FirstOrDefault(x => x.Alias.Equals("richText"))?.Value?.ToString(); + Assert.NotNull(bodyText); + + var containsDataUri = bodyText.Contains("data:image"); + if (shouldHaveDataUri) + { + Assert.True(containsDataUri, $"Data URIs were expected to be found in the body: {bodyText}"); + } else { + Assert.False(containsDataUri, $"Data URIs were not expected to be found in the body: {bodyText}"); + } + }); + } } diff --git a/tests/Umbraco.Tests.Integration/appsettings.Tests.json b/tests/Umbraco.Tests.Integration/appsettings.Tests.json index 101b1a1aef1e..1d4bbf18ef86 100644 --- a/tests/Umbraco.Tests.Integration/appsettings.Tests.json +++ b/tests/Umbraco.Tests.Integration/appsettings.Tests.json @@ -1,4 +1,5 @@ { + "$schema": "./appsettings-schema.json", "Logging": { "LogLevel": { "Default": "Warning", @@ -16,5 +17,12 @@ "EmptyDatabasesCount": 2, "SQLServerMasterConnectionString": "" } + }, + "Umbraco": { + "CMS": { + "Content": { + "AllowedUploadedFileExtensions": ["jpg", "png", "gif", "svg"] + } + } } }