From 2736cfdc361fa594dc393c8f9725d932caf072ef Mon Sep 17 00:00:00 2001 From: Richard Davison Date: Tue, 7 May 2024 16:36:54 +0200 Subject: [PATCH 1/4] URL util functions --- Cargo.lock | 1 + llrt_core/Cargo.toml | 1 + llrt_core/src/modules/http/url.rs | 168 +++++++++++++++--- .../src/modules/http/url_search_params.rs | 7 +- llrt_core/src/modules/url.rs | 26 ++- llrt_core/src/vm.rs | 9 +- tests/unit/http.test.ts | 98 +++++++++- 7 files changed, 271 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 405107f0c4..32be1aa819 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -910,6 +910,7 @@ dependencies = [ "hyper", "hyper-rustls", "hyper-util", + "idna", "itoa", "jwalk", "libc", diff --git a/llrt_core/Cargo.toml b/llrt_core/Cargo.toml index 7e06888d9b..b94dc09ce0 100644 --- a/llrt_core/Cargo.toml +++ b/llrt_core/Cargo.toml @@ -75,6 +75,7 @@ rand = "0.8.5" uname = "0.1.1" flate2 = { version = "1.0.30", features = ["zlib-ng"], default-features = false } brotlic = "0.8.2" +idna = "0.5.0" [build-dependencies] rquickjs = { version = "0.5.1", features = [ diff --git a/llrt_core/src/modules/http/url.rs b/llrt_core/src/modules/http/url.rs index 1b44a0920a..ed7e0748aa 100644 --- a/llrt_core/src/modules/http/url.rs +++ b/llrt_core/src/modules/http/url.rs @@ -1,3 +1,5 @@ +use std::{path::PathBuf, str::FromStr}; + // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use rquickjs::{ @@ -5,7 +7,7 @@ use rquickjs::{ class::{Trace, Tracer}, function::Opt, prelude::This, - Class, Coerced, Ctx, Exception, FromJs, Function, Result, Value, + Class, Coerced, Ctx, Exception, FromJs, Function, Object, Result, Value, }; use url::Url; @@ -90,27 +92,7 @@ impl<'js> URL<'js> { } pub fn to_string(&self) -> String { - let search = search_params_to_string(&self.search_params); - let hash = &self.hash; - let hash = if !hash.is_empty() { - format!("#{}", &hash) - } else { - String::from("") - }; - let mut user_info = String::new(); - if !self.username.is_empty() { - user_info.push_str(&self.username); - if !self.password.is_empty() { - user_info.push(':'); - user_info.push_str(&self.password) - } - user_info.push('@') - } - - format!( - "{}://{}{}{}{}{}", - &self.protocol, user_info, &self.host, &self.pathname, &search, &hash - ) + self.format(true, true, true, false) } #[qjs(get)] @@ -309,6 +291,47 @@ impl<'js> URL<'js> { self.host.clone_from(&self.hostname); } } + + fn format( + &self, + include_auth: bool, + include_fragment: bool, + include_search: bool, + unicode_encode: bool, + ) -> String { + let search = if include_search { + search_params_to_string(&self.search_params) + } else { + String::from("") + }; + let hash = &self.hash; + let hash = if include_fragment && !hash.is_empty() { + format!("#{}", &hash) + } else { + String::from("") + }; + + let mut user_info = String::new(); + if include_auth && !self.username.is_empty() { + user_info.push_str(&self.username); + if !self.password.is_empty() { + user_info.push(':'); + user_info.push_str(&self.password) + } + user_info.push('@') + } + + let host = if unicode_encode { + domain_to_unicode(&self.host) + } else { + self.host.clone() + }; + + format!( + "{}://{}{}{}{}{}", + &self.protocol, user_info, host, &self.pathname, &search, &hash + ) + } } fn filtered_port(protocol: &str, port: &str) -> Option { @@ -357,3 +380,104 @@ fn split_colon<'js>(ctx: &Ctx, s: &'js str) -> Result<(&'js str, &'js str)> { } Ok((first, second)) } + +pub fn url_to_http_options<'js>(ctx: Ctx<'js>, url: Class<'js, URL<'js>>) -> Result> { + let obj = Object::new(ctx)?; + + let url = url.borrow(); + + let port = url.port(); + let username = url.username(); + let search = url.search(); + let hash = url.hash(); + + obj.set("protocol", url.protocol())?; + obj.set("hostname", url.hostname())?; + + if !hash.is_empty() { + obj.set("hash", url.hash())?; + } + if !search.is_empty() { + obj.set("search", url.search())?; + } + + obj.set("pathname", url.pathname())?; + obj.set("path", format!("{}{}", url.pathname(), url.search()))?; + obj.set("href", url.href())?; + + if !username.is_empty() { + obj.set("auth", format!("{}:{}", username, url.password()))?; + } + + if !port.is_empty() { + obj.set("port", url.port())?; + } + + Ok(obj) +} + +pub fn domain_to_unicode<'js>(domain: &str) -> String { + let (url, result) = idna::domain_to_unicode(domain); + if result.is_err() { + return String::from(""); + } + url +} + +pub fn domain_to_ascii(domain: &String) -> String { + idna::domain_to_ascii(domain).unwrap_or_default() +} + +//options are ignored, no windows support yet +pub fn path_to_file_url<'js>(ctx: Ctx<'js>, path: String, _: Opt) -> Result> { + let url = Url::from_file_path(path).unwrap(); + URL::create(ctx, url) +} + +//options are ignored, no windows support yet +pub fn file_url_to_path<'js>(ctx: Ctx<'js>, url: Value<'js>) -> Result { + let url_string = if let Ok(url) = Class::::from_value(url.clone()) { + url.borrow().to_string() + } else { + url.get::>()?.to_string() + }; + + let path = if let Some(path) = &url_string.strip_prefix("file://") { + path.to_string() + } else { + url_string + }; + + Ok(PathBuf::from_str(&path) + .or_throw(&ctx)? + .to_string_lossy() + .to_string()) +} + +pub fn url_format<'js>(url: Class<'js, URL<'js>>, options: Opt>) -> Result { + let mut fragment = true; + let mut unicode = false; + let mut auth = true; + let mut search = true; + + // Parse options if provided + if let Some(options) = options.0 { + if options.is_object() { + let options = options.as_object().unwrap(); + if let Some(value) = options.get("fragment")? { + fragment = value; + } + if let Ok(value) = options.get("unicode") { + unicode = value; + } + if let Ok(value) = options.get("auth") { + auth = value; + } + if let Ok(value) = options.get("search") { + search = value + } + } + } + + Ok(url.borrow().format(auth, fragment, search, unicode)) +} diff --git a/llrt_core/src/modules/http/url_search_params.rs b/llrt_core/src/modules/http/url_search_params.rs index 591bd832a4..04f90a19c7 100644 --- a/llrt_core/src/modules/http/url_search_params.rs +++ b/llrt_core/src/modules/http/url_search_params.rs @@ -125,8 +125,11 @@ impl URLSearchParams { let mut string = String::with_capacity(self.params.len() * 2); for (i, (key, value)) in self.params.iter().enumerate() { string.push_str(&escape(key)); - string.push('='); - string.push_str(&escape(value)); + if !value.is_empty() { + string.push('='); + string.push_str(&escape(value)); + } + if i < length - 1 { string.push('&'); } diff --git a/llrt_core/src/modules/url.rs b/llrt_core/src/modules/url.rs index 472506614b..4df1deff73 100644 --- a/llrt_core/src/modules/url.rs +++ b/llrt_core/src/modules/url.rs @@ -1,19 +1,29 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use rquickjs::{ - function::Constructor, + function::{Constructor, Func}, module::{Declarations, Exports, ModuleDef}, Ctx, Result, }; use crate::{module_builder::ModuleInfo, modules::module::export_default}; + +use super::http::url::{ + domain_to_ascii, domain_to_unicode, file_url_to_path, path_to_file_url, url_format, + url_to_http_options, +}; pub struct UrlModule; impl ModuleDef for UrlModule { fn declare(declare: &mut Declarations) -> Result<()> { declare.declare(stringify!(URL))?; declare.declare(stringify!(URLSearchParams))?; - + declare.declare("urlToHttpOptions")?; + declare.declare("domainToUnicode")?; + declare.declare("domainToASCII")?; + declare.declare("fileURLToPath")?; + declare.declare("pathToFileURL")?; + declare.declare("format")?; declare.declare("default")?; Ok(()) } @@ -26,6 +36,18 @@ impl ModuleDef for UrlModule { export_default(ctx, exports, |default| { default.set(stringify!(URL), url)?; default.set(stringify!(URLSearchParams), url_search_params)?; + default.set("urlToHttpOptions", Func::from(url_to_http_options))?; + default.set( + "domainToUnicode", + Func::from(|domain: String| domain_to_unicode(&domain)), + )?; + default.set( + "domainToASCII", + Func::from(|domain: String| domain_to_ascii(&domain)), + )?; + default.set("fileURLToPath", Func::from(file_url_to_path))?; + default.set("pathToFileURL", Func::from(path_to_file_url))?; + default.set("format", Func::from(url_format))?; Ok(()) })?; diff --git a/llrt_core/src/vm.rs b/llrt_core/src/vm.rs index 55c67ed084..c1dd9c56f2 100644 --- a/llrt_core/src/vm.rs +++ b/llrt_core/src/vm.rs @@ -619,10 +619,11 @@ fn init(ctx: &Ctx<'_>, module_names: HashSet<&'static str>) -> Result<()> { "require", Func::from(move |ctx, specifier: String| -> Result { let LifetimeArgs(ctx) = LifetimeArgs(ctx); - let specifier: String = specifier - .strip_prefix("node:") - .unwrap_or(specifier.as_str()) - .into(); + let specifier = if let Some(striped_specifier) = &specifier.strip_prefix("node:") { + striped_specifier.to_string() + } else { + specifier + }; let import_name = if module_names.contains(specifier.as_str()) || BYTECODE_CACHE.contains_key(&specifier) || specifier.starts_with('/') diff --git a/tests/unit/http.test.ts b/tests/unit/http.test.ts index 2c3237054a..bef29aad40 100644 --- a/tests/unit/http.test.ts +++ b/tests/unit/http.test.ts @@ -1,42 +1,44 @@ -import * as url from "url"; +import * as urlModule from "url"; describe("URL module import", () => { it("global URL and imported URL are equal", () => { const testUrl = "https://www.example.com"; - const moduleUrl = new url.URL(testUrl); + const moduleUrl = new urlModule.URL(testUrl); const globalUrl = new URL(testUrl); expect(moduleUrl).toEqual(globalUrl); }); it("global URLSearchParams and imported URLSearchParams are equal", () => { const paramsString = "topic=api&a=1&a=2&a=3"; - const moduleSearchParams = new url.URLSearchParams(paramsString); + const moduleSearchParams = new urlModule.URLSearchParams(paramsString); const globalSearchParams = new URLSearchParams(paramsString); expect(moduleSearchParams).toEqual(globalSearchParams); }); describe("import { URL } from 'url';", () => { it("should parse a url hostname", () => { - const testUrl = new url.URL("https://www.example.com"); + const testUrl = new urlModule.URL("https://www.example.com"); expect(testUrl.protocol).toEqual("https:"); expect(testUrl.host).toEqual("www.example.com"); expect(testUrl.hostname).toEqual("www.example.com"); }); it("toString method works", () => { - const testUrl = new url.URL("/base", "https://www.example.com"); + const testUrl = new urlModule.URL("/base", "https://www.example.com"); expect(testUrl.toString()).toEqual("https://www.example.com/base"); }); it("canParse method works", () => { - const validCanParse = url.URL.canParse("https://www.example.com"); - const invalidCanParse = url.URL.canParse("not_valid"); + const validCanParse = urlModule.URL.canParse("https://www.example.com"); + const invalidCanParse = urlModule.URL.canParse("not_valid"); expect(validCanParse).toEqual(true); expect(invalidCanParse).toEqual(false); - expect(url.URL.canParse("/foo", "https://example.org/")).toEqual(true); + expect(urlModule.URL.canParse("/foo", "https://example.org/")).toEqual( + true + ); }); }); describe("import { URLSearchParams } from 'url';", () => { it("supports URLSearchParams basic API", () => { const paramsString = "topic=api&a=1&a=2&a=3"; - const searchParams = new url.URLSearchParams(paramsString); + const searchParams = new urlModule.URLSearchParams(paramsString); searchParams.append("foo", "bar"); expect(searchParams.has("topic")).toBeTruthy(); expect(searchParams.has("foo")).toBeTruthy(); @@ -542,3 +544,81 @@ describe("Blob class", () => { expect(slicedBlob.type).toEqual("text/plain"); }); }); + +describe("URL Utility Functions", () => { + it("converts URL object to http options with urlToHttpOptions", () => { + const url = new URL( + "https://user:password@example.com:8080/path/to/file?param1=value1¶m2=value2#fragment" + ); + const options = urlModule.urlToHttpOptions(url); + + expect(options).toEqual({ + protocol: "https:", + hostname: "example.com", + hash: "fragment", + search: "?param1=value1¶m2=value2", + pathname: "/path/to/file", + path: "/path/to/file?param1=value1¶m2=value2", + href: "https://user:password@example.com:8080/path/to/file?param1=value1¶m2=value2#fragment", + auth: "user:password", + port: "8080", + }); + }); + + it("handles URL without credentials or port with urlToHttpOptions", () => { + const url = new URL("http://example.com/path/to/file"); + const options = urlModule.urlToHttpOptions(url); + + expect(options).toEqual({ + protocol: "http:", + hostname: "example.com", + pathname: "/path/to/file", + path: "/path/to/file", + href: "http://example.com/path/to/file", + }); + }); + + it("converts punycode domain to unicode with domainToUnicode", () => { + const unicodeDomain = urlModule.domainToUnicode("xn--d1mi3b5c.com"); + expect(unicodeDomain).toBe("㶠㶤㷀㶱.com"); + }); + + it("handles already unicode domain with domainToUnicode", () => { + const unicodeDomain = urlModule.domainToUnicode("example.com"); + expect(unicodeDomain).toBe("example.com"); + }); + + it("converts unicode domain to punycode with domainToASCII", () => { + const asciiDomain = urlModule.domainToASCII("example.com"); + expect(asciiDomain).toBe("example.com"); // No conversion needed + }); + + it("converts non-ASCII domain to punycode with domainToASCII", () => { + const asciiDomain = urlModule.domainToASCII("مثال.com"); + + expect(asciiDomain).toBe("xn--mgbh0fb.com"); + }); + + it("converts file URL to system path with fileURLToPath", () => { + const url = new URL("file:///path/to/file.txt"); + const path = urlModule.fileURLToPath(url); + + expect(path).toBe("/path/to/file.txt"); // Platform specific path handling might differ + }); + + it("converts system path to file URL with pathToFileURL", () => { + const url = urlModule.pathToFileURL("/path/to/file.txt"); + + expect(url.href).toBe("file:///path/to/file.txt"); // Platform specific path handling might differ + }); + + it("formats URL object into a string with format", () => { + const url = new URL("https://a:b@測試?abc#foo"); + + expect(url.href).toBe("https://a:b@xn--g6w251d/?abc#foo"); + + expect( + urlModule.format(url, { fragment: false, unicode: true, auth: false }) + ).toBe("https://測試/?abc"); + }); +}); From b23508d1ed99a1df8c8f200e572918f51ea5f77b Mon Sep 17 00:00:00 2001 From: Richard Davison Date: Tue, 7 May 2024 16:43:03 +0200 Subject: [PATCH 2/4] Update API docs --- API.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/API.md b/API.md index 60a10e5826..ae82035308 100644 --- a/API.md +++ b/API.md @@ -17,8 +17,7 @@ Everything else inherited from [Uint8Array](https://developer.mozilla.org/en-US/ ## child_process -> [!WARNING] -> `spawn` uses native streams that is not 100% compatible with the Node.js Streams API. +> [!WARNING] > `spawn` uses native streams that is not 100% compatible with the Node.js Streams API. [spawn](https://nodejs.org/api/child_process.html#child_processspawncommand-args-options) @@ -115,8 +114,7 @@ export function decode(value: string): Uint8Array; [createRequire](https://nodejs.org/api/module.html#modulecreaterequirefilename) -> [!NOTE] -> `require` is available from esm modules natively. This function is just for compatibility +> [!NOTE] > `require` is available from esm modules natively. This function is just for compatibility ## os @@ -190,8 +188,6 @@ export class URL { } ``` -### TODO, URL see tracking [ticket](https://github.com/awslabs/llrt/issues/303): - ```typescript // Additional utilities in the URL module export function domainToASCII(domain: string): string; @@ -202,11 +198,15 @@ export function fileURLToPath(url: string | URL): string; export function pathToFileURL(path: string): URL; +export function format(url: string | URL, options?: { fragment?: boolean, unicode?: boolean, auth?: boolean +}): string; + export function urlToHttpOptions(url: URL): { protocol?: string; hostname?: string; port?: string; path?: string; + ... }; ``` From a58a29a8de640522eb3abc10e1e8ddb39bf6f248 Mon Sep 17 00:00:00 2001 From: Richard Davison Date: Tue, 7 May 2024 20:56:15 +0200 Subject: [PATCH 3/4] clippy --- llrt_core/src/modules/http/url.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llrt_core/src/modules/http/url.rs b/llrt_core/src/modules/http/url.rs index ed7e0748aa..78e0b190ec 100644 --- a/llrt_core/src/modules/http/url.rs +++ b/llrt_core/src/modules/http/url.rs @@ -416,7 +416,7 @@ pub fn url_to_http_options<'js>(ctx: Ctx<'js>, url: Class<'js, URL<'js>>) -> Res Ok(obj) } -pub fn domain_to_unicode<'js>(domain: &str) -> String { +pub fn domain_to_unicode(domain: &str) -> String { let (url, result) = idna::domain_to_unicode(domain); if result.is_err() { return String::from(""); @@ -424,7 +424,7 @@ pub fn domain_to_unicode<'js>(domain: &str) -> String { url } -pub fn domain_to_ascii(domain: &String) -> String { +pub fn domain_to_ascii(domain: &str) -> String { idna::domain_to_ascii(domain).unwrap_or_default() } From 608e2852fa8de4a3af1ec1fa95aa62b472e778b5 Mon Sep 17 00:00:00 2001 From: Richard Davison Date: Wed, 8 May 2024 07:32:52 +0200 Subject: [PATCH 4/4] Fix markdown --- API.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/API.md b/API.md index ae82035308..363c98c18d 100644 --- a/API.md +++ b/API.md @@ -17,7 +17,8 @@ Everything else inherited from [Uint8Array](https://developer.mozilla.org/en-US/ ## child_process -> [!WARNING] > `spawn` uses native streams that is not 100% compatible with the Node.js Streams API. +> [!WARNING] +> `spawn` uses native streams that is not 100% compatible with the Node.js Streams API. [spawn](https://nodejs.org/api/child_process.html#child_processspawncommand-args-options) @@ -114,7 +115,8 @@ export function decode(value: string): Uint8Array; [createRequire](https://nodejs.org/api/module.html#modulecreaterequirefilename) -> [!NOTE] > `require` is available from esm modules natively. This function is just for compatibility +> [!NOTE] +> `require` is available from esm modules natively. This function is just for compatibility ## os