diff --git a/Sources/FoundationEssentials/String/String+Comparison.swift b/Sources/FoundationEssentials/String/String+Comparison.swift index e322e59de..2520f2c8a 100644 --- a/Sources/FoundationEssentials/String/String+Comparison.swift +++ b/Sources/FoundationEssentials/String/String+Comparison.swift @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -extension UTF8.CodeUnit { +package extension UTF8.CodeUnit { static let newline: Self = 0x0A static let carriageReturn: Self = 0x0D diff --git a/Sources/FoundationInternationalization/CMakeLists.txt b/Sources/FoundationInternationalization/CMakeLists.txt index b3e6feb11..48379841b 100644 --- a/Sources/FoundationInternationalization/CMakeLists.txt +++ b/Sources/FoundationInternationalization/CMakeLists.txt @@ -17,7 +17,8 @@ add_library(FoundationInternationalization Date+ICU.swift Duration+Utils.swift RangeExpression.swift - TimeInterval+Utils.swift) + TimeInterval+Utils.swift + URLParser+ICU.swift) add_subdirectory(Calendar) add_subdirectory(Formatting) diff --git a/Sources/FoundationInternationalization/URLParser+ICU.swift b/Sources/FoundationInternationalization/URLParser+ICU.swift new file mode 100644 index 000000000..e41cc84de --- /dev/null +++ b/Sources/FoundationInternationalization/URLParser+ICU.swift @@ -0,0 +1,211 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2024 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +#if canImport(FoundationEssentials) +import FoundationEssentials +#endif + +internal import _FoundationICU + +internal final class UIDNAHookICU: UIDNAHook { + // `Sendable` notes: `UIDNA` from ICU is thread safe. + struct UIDNAPointer : @unchecked Sendable { + init(_ ptr: OpaquePointer?) { self.idnaTranscoder = ptr } + var idnaTranscoder: OpaquePointer? + } + + private static func U_SUCCESS(_ x: Int32) -> Bool { + return x <= U_ZERO_ERROR.rawValue + } + + private static let idnaTranscoder: UIDNAPointer? = { + var status = U_ZERO_ERROR + let options = UInt32( + UIDNA_CHECK_BIDI | + UIDNA_CHECK_CONTEXTJ | + UIDNA_NONTRANSITIONAL_TO_UNICODE | + UIDNA_NONTRANSITIONAL_TO_ASCII + ) + let encoder = uidna_openUTS46(options, &status) + guard U_SUCCESS(status.rawValue) else { + return nil + } + return UIDNAPointer(encoder) + }() + + private static func shouldAllow(_ errors: UInt32, encodeToASCII: Bool) -> Bool { + let allowedErrors: UInt32 + if encodeToASCII { + allowedErrors = 0 + } else { + allowedErrors = UInt32( + UIDNA_ERROR_EMPTY_LABEL | + UIDNA_ERROR_LABEL_TOO_LONG | + UIDNA_ERROR_DOMAIN_NAME_TOO_LONG | + UIDNA_ERROR_LEADING_HYPHEN | + UIDNA_ERROR_TRAILING_HYPHEN | + UIDNA_ERROR_HYPHEN_3_4 + ) + } + return errors & ~allowedErrors == 0 + } + + /// Type of `uidna_nameToASCII` and `uidna_nameToUnicode` functions + private typealias TranscodingFunction = (OpaquePointer?, UnsafePointer?, Int32, UnsafeMutablePointer?, Int32, UnsafeMutablePointer?, UnsafeMutablePointer?) -> Int32 + + private static func IDNACodedHost( + hostBuffer: UnsafeBufferPointer, + transcode: TranscodingFunction, + allowErrors: (UInt32) -> Bool, + createString: (UnsafeMutablePointer, Int) -> String? + ) -> String? { + let maxHostBufferLength = 2048 + if hostBuffer.count > maxHostBufferLength { + return nil + } + + guard let transcoder = idnaTranscoder else { + return nil + } + + let result: String? = withUnsafeTemporaryAllocation(of: T.self, capacity: maxHostBufferLength) { outBuffer in + var processingDetails = UIDNAInfo( + size: Int16(MemoryLayout.size), + isTransitionalDifferent: 0, + reservedB3: 0, + errors: 0, + reservedI2: 0, + reservedI3: 0 + ) + var error = U_ZERO_ERROR + + let hostBufferPtr = hostBuffer.baseAddress! + let outBufferPtr = outBuffer.baseAddress! + + let charsConverted = transcode( + transcoder.idnaTranscoder, + hostBufferPtr, + Int32(hostBuffer.count), + outBufferPtr, + Int32(outBuffer.count), + &processingDetails, + &error + ) + + if U_SUCCESS(error.rawValue), allowErrors(processingDetails.errors), charsConverted > 0 { + return createString(outBufferPtr, Int(charsConverted)) + } + return nil + } + return result + } + + private static func IDNACodedHostUTF8(_ utf8Buffer: UnsafeBufferPointer, encodeToASCII: Bool) -> String? { + var transcode = uidna_nameToUnicodeUTF8 + if encodeToASCII { + transcode = uidna_nameToASCII_UTF8 + } + return utf8Buffer.withMemoryRebound(to: CChar.self) { charBuffer in + return IDNACodedHost( + hostBuffer: charBuffer, + transcode: transcode, + allowErrors: { errors in + shouldAllow(errors, encodeToASCII: encodeToASCII) + }, + createString: { ptr, count in + let outBuffer = UnsafeBufferPointer(start: ptr, count: count).withMemoryRebound(to: UInt8.self) { $0 } + var hostsAreEqual = false + if outBuffer.count == utf8Buffer.count { + hostsAreEqual = true + for i in 0.., encodeToASCII: Bool) -> String? { + var transcode = uidna_nameToUnicode + if encodeToASCII { + transcode = uidna_nameToASCII + } + return IDNACodedHost( + hostBuffer: utf16Buffer, + transcode: transcode, + allowErrors: { errors in + shouldAllow(errors, encodeToASCII: encodeToASCII) + }, + createString: { ptr, count in + let outBuffer = UnsafeBufferPointer(start: ptr, count: count) + var hostsAreEqual = false + if outBuffer.count == utf16Buffer.count { + hostsAreEqual = true + for i in 0.. String? { + let fastResult = host.utf8.withContiguousStorageIfAvailable { + IDNACodedHostUTF8($0, encodeToASCII: encodeToASCII) + } + if let fastResult { + return fastResult + } + #if FOUNDATION_FRAMEWORK + if let fastCharacters = host._ns._fastCharacterContents() { + let charsBuffer = UnsafeBufferPointer(start: fastCharacters, count: host._ns.length) + return IDNACodedHostUTF16(charsBuffer, encodeToASCII: encodeToASCII) + } + #endif + var hostString = String(host) + return hostString.withUTF8 { + IDNACodedHostUTF8($0, encodeToASCII: encodeToASCII) + } + } + + static func encode(_ host: some StringProtocol) -> String? { + return IDNACodedHost(host, encodeToASCII: true) + } + + static func decode(_ host: some StringProtocol) -> String? { + return IDNACodedHost(host, encodeToASCII: false) + } + +} diff --git a/Tests/FoundationInternationalizationTests/URLTests+UIDNA.swift b/Tests/FoundationInternationalizationTests/URLTests+UIDNA.swift new file mode 100644 index 000000000..142e66bdf --- /dev/null +++ b/Tests/FoundationInternationalizationTests/URLTests+UIDNA.swift @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2024 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// + +#if FOUNDATION_FRAMEWORK +@testable import Foundation +#else +@testable import FoundationEssentials +@testable import FoundationInternationalization +#endif // FOUNDATION_FRAMEWORK + +#if canImport(TestSupport) +import TestSupport +#endif + +final class URLUIDNATests: XCTestCase { + func testURLHostUIDNAEncoding() { + let emojiURL = URL(string: "https://i❤️tacos.ws/🏳️‍🌈/冰淇淋") + let emojiURLEncoded = "https://xn--itacos-i50d.ws/%F0%9F%8F%B3%EF%B8%8F%E2%80%8D%F0%9F%8C%88/%E5%86%B0%E6%B7%87%E6%B7%8B" + XCTAssertEqual(emojiURL?.absoluteString, emojiURLEncoded) + XCTAssertEqual(emojiURL?.host(percentEncoded: false), "xn--itacos-i50d.ws") + + let chineseURL = URL(string: "http://見.香港/热狗/🌭") + let chineseURLEncoded = "http://xn--nw2a.xn--j6w193g/%E7%83%AD%E7%8B%97/%F0%9F%8C%AD" + XCTAssertEqual(chineseURL?.absoluteString, chineseURLEncoded) + XCTAssertEqual(chineseURL?.host(percentEncoded: false), "xn--nw2a.xn--j6w193g") + } +}