From 18640a0be441cf5962e4df82f4ffab0b2f5e98f2 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 6 Feb 2025 10:54:42 -0500 Subject: [PATCH 1/6] perf: remove heap allocation in parse_host --- url/src/host.rs | 66 +++++++++++++++++++++++++++++++++++------------ url/src/lib.rs | 9 ++++--- url/src/parser.rs | 38 +++++++++++++-------------- url/src/quirks.rs | 4 +-- 4 files changed, 75 insertions(+), 42 deletions(-) diff --git a/url/src/host.rs b/url/src/host.rs index f45232c5..7699da97 100644 --- a/url/src/host.rs +++ b/url/src/host.rs @@ -10,7 +10,6 @@ use crate::net::{Ipv4Addr, Ipv6Addr}; use alloc::borrow::Cow; use alloc::borrow::ToOwned; use alloc::string::String; -use alloc::string::ToString; use alloc::vec::Vec; use core::cmp; use core::fmt::{self, Formatter}; @@ -30,8 +29,8 @@ pub(crate) enum HostInternal { Ipv6(Ipv6Addr), } -impl From> for HostInternal { - fn from(host: Host) -> HostInternal { +impl From>> for HostInternal { + fn from(host: Host>) -> HostInternal { match host { Host::Domain(ref s) if s.is_empty() => HostInternal::None, Host::Domain(_) => HostInternal::Domain, @@ -75,11 +74,8 @@ impl Host<&str> { } } -impl Host { - /// Parse a host: either an IPv6 address in [] square brackets, or a domain. - /// - /// - pub fn parse(input: &str) -> Result { +impl<'a> Host> { + pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result { if input.starts_with('[') { if !input.ends_with(']') { return Err(ParseError::InvalidIpv6Address); @@ -87,8 +83,15 @@ impl Host { return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6); } let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into(); + let domain: Cow<'a, [u8]> = match domain { + Cow::Borrowed(_) => match input { + Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()), + Cow::Owned(input) => Cow::Owned(input.into_bytes()), + }, + Cow::Owned(v) => Cow::Owned(v), + }; - let domain = Self::domain_to_ascii(&domain)?; + let domain = domain_to_ascii(domain)?; if domain.is_empty() { return Err(ParseError::EmptyHost); @@ -98,12 +101,11 @@ impl Host { let address = parse_ipv4addr(&domain)?; Ok(Host::Ipv4(address)) } else { - Ok(Host::Domain(domain.to_string())) + Ok(Host::Domain(domain)) } } - // - pub fn parse_opaque(input: &str) -> Result { + pub(crate) fn parse_opaque_cow(input: Cow<'a, str>) -> Result { if input.starts_with('[') { if !input.ends_with(']') { return Err(ParseError::InvalidIpv6Address); @@ -137,17 +139,49 @@ impl Host { Err(ParseError::InvalidDomainCharacter) } else { Ok(Host::Domain( - utf8_percent_encode(input, CONTROLS).to_string(), + match utf8_percent_encode(&input, CONTROLS).into() { + Cow::Owned(v) => Cow::Owned(v), + Cow::Borrowed(_) => input, + }, )) } } - /// convert domain with idna - fn domain_to_ascii(domain: &[u8]) -> Result, ParseError> { - idna::domain_to_ascii_cow(domain, idna::AsciiDenyList::URL).map_err(Into::into) + pub(crate) fn into_owned(self) -> Host { + match self { + Host::Domain(s) => Host::Domain(s.into_owned()), + Host::Ipv4(ip) => Host::Ipv4(ip), + Host::Ipv6(ip) => Host::Ipv6(ip), + } } } +impl Host { + /// Parse a host: either an IPv6 address in [] square brackets, or a domain. + /// + /// + pub fn parse(input: &str) -> Result { + Host::>::parse_cow(input.into()).map(|i| i.into_owned()) + } + + // + pub fn parse_opaque(input: &str) -> Result { + Host::>::parse_opaque_cow(input.into()).map(|i| i.into_owned()) + } +} + +/// convert domain with idna +fn domain_to_ascii(domain: Cow<'_, [u8]>) -> Result, ParseError> { + let value = idna::domain_to_ascii_cow(&domain, idna::AsciiDenyList::URL)?; + Ok(match value { + Cow::Owned(value) => Cow::Owned(value), + Cow::Borrowed(_) => match domain { + Cow::Borrowed(value) => unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(value)) }, + Cow::Owned(value) => unsafe { String::from_utf8_unchecked(value).into() }, + }, + }) +} + impl> fmt::Display for Host { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { diff --git a/url/src/lib.rs b/url/src/lib.rs index 96fa1eee..798536e9 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -183,6 +183,7 @@ use core::fmt::Write; use core::ops::{Range, RangeFrom, RangeTo}; use core::{cmp, fmt, hash, mem}; use percent_encoding::utf8_percent_encode; +use std::borrow::Cow; #[cfg(feature = "std")] #[cfg(any( unix, @@ -2032,9 +2033,9 @@ impl Url { } } if SchemeType::from(self.scheme()).is_special() { - self.set_host_internal(Host::parse(host_substr)?, None); + self.set_host_internal(Host::parse_cow(host_substr.into())?, None); } else { - self.set_host_internal(Host::parse_opaque(host_substr)?, None); + self.set_host_internal(Host::parse_opaque_cow(host_substr.into())?, None); } } else if self.has_host() { if scheme_type.is_special() && !scheme_type.is_file() { @@ -2070,7 +2071,7 @@ impl Url { } /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. - fn set_host_internal(&mut self, host: Host, opt_new_port: Option>) { + fn set_host_internal(&mut self, host: Host>, opt_new_port: Option>) { let old_suffix_pos = if opt_new_port.is_some() { self.path_start } else { @@ -2987,7 +2988,7 @@ fn path_to_file_url_segments_windows( serialization.push(':'); } Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => { - let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?; + let host = Host::parse_cow(server.to_str().ok_or(())?.into()).map_err(|_| ())?; write!(serialization, "{}", host).unwrap(); host_end = to_u32(serialization.len()).unwrap(); host_internal = host.into(); diff --git a/url/src/parser.rs b/url/src/parser.rs index 416484f1..4b347feb 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -7,9 +7,9 @@ // except according to those terms. use alloc::string::String; -use alloc::string::ToString; use core::fmt::{self, Formatter, Write}; use core::str; +use std::borrow::Cow; use crate::host::{Host, HostInternal}; use crate::Url; @@ -979,7 +979,7 @@ impl<'a> Parser<'a> { pub fn parse_host( mut input: Input<'_>, scheme_type: SchemeType, - ) -> ParseResult<(Host, Input<'_>)> { + ) -> ParseResult<(Host>, Input<'_>)> { if scheme_type.is_file() { return Parser::get_file_host(input); } @@ -1010,34 +1010,34 @@ impl<'a> Parser<'a> { } bytes += c.len_utf8(); } - let replaced: String; let host_str; { let host_input = input.by_ref().take(non_ignored_chars); if has_ignored_chars { - replaced = host_input.collect(); - host_str = &*replaced + host_str = Cow::Owned(host_input.collect()); } else { for _ in host_input {} - host_str = &input_str[..bytes] + host_str = Cow::Borrowed(&input_str[..bytes]); } } if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() { return Err(ParseError::EmptyHost); } if !scheme_type.is_special() { - let host = Host::parse_opaque(host_str)?; + let host = Host::parse_opaque_cow(host_str)?; return Ok((host, input)); } - let host = Host::parse(host_str)?; + let host = Host::parse_cow(host_str)?; Ok((host, input)) } - fn get_file_host(input: Input<'_>) -> ParseResult<(Host, Input<'_>)> { + fn get_file_host(input: Input<'_>) -> ParseResult<(Host>, Input<'_>)> { let (_, host_str, remaining) = Parser::file_host(input)?; let host = match Host::parse(&host_str)? { - Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()), - host => host, + Host::Domain(ref d) if d == "localhost" => Host::Domain(Cow::Borrowed("")), + Host::Domain(s) => Host::Domain(Cow::Owned(s)), + Host::Ipv4(ip) => Host::Ipv4(ip), + Host::Ipv6(ip) => Host::Ipv6(ip), }; Ok((host, remaining)) } @@ -1052,7 +1052,7 @@ impl<'a> Parser<'a> { has_host = false; HostInternal::None } else { - match Host::parse(&host_str)? { + match Host::parse_cow(host_str)? { Host::Domain(ref d) if d == "localhost" => { has_host = false; HostInternal::None @@ -1067,7 +1067,7 @@ impl<'a> Parser<'a> { Ok((has_host, host, remaining)) } - pub fn file_host(input: Input) -> ParseResult<(bool, String, Input)> { + pub fn file_host(input: Input<'_>) -> ParseResult<(bool, Cow<'_, str>, Input<'_>)> { // Undo the Input abstraction here to avoid allocating in the common case // where the host part of the input does not contain any tab or newline let input_str = input.chars.as_str(); @@ -1082,23 +1082,21 @@ impl<'a> Parser<'a> { } bytes += c.len_utf8(); } - let replaced: String; let host_str; let mut remaining = input.clone(); { let host_input = remaining.by_ref().take(non_ignored_chars); if has_ignored_chars { - replaced = host_input.collect(); - host_str = &*replaced + host_str = Cow::Owned(host_input.collect()); } else { for _ in host_input {} - host_str = &input_str[..bytes] + host_str = Cow::Borrowed(&input_str[..bytes]); } } - if is_windows_drive_letter(host_str) { - return Ok((false, "".to_string(), input)); + if is_windows_drive_letter(&host_str) { + return Ok((false, "".into(), input)); } - Ok((true, host_str.to_string(), remaining)) + Ok((true, host_str, remaining)) } pub fn parse_port

( diff --git a/url/src/quirks.rs b/url/src/quirks.rs index 8626f64c..35beef90 100644 --- a/url/src/quirks.rs +++ b/url/src/quirks.rs @@ -161,7 +161,7 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { let scheme = url.scheme(); let scheme_type = SchemeType::from(scheme); if scheme_type == SchemeType::File && new_host.is_empty() { - url.set_host_internal(Host::Domain(String::new()), None); + url.set_host_internal(Host::Domain("".into()), None); return Ok(()); } @@ -208,7 +208,7 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { let input = Input::new_no_trim(new_hostname); let scheme_type = SchemeType::from(url.scheme()); if scheme_type == SchemeType::File && new_hostname.is_empty() { - url.set_host_internal(Host::Domain(String::new()), None); + url.set_host_internal(Host::Domain("".into()), None); return Ok(()); } From 4b12c9bb3a34f2493ec7780c0a3d06b2499e3faf Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 6 Feb 2025 12:53:46 -0500 Subject: [PATCH 2/6] make compile with no_std --- url/src/host.rs | 2 +- url/src/lib.rs | 2 +- url/src/parser.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/url/src/host.rs b/url/src/host.rs index 7699da97..c01702e1 100644 --- a/url/src/host.rs +++ b/url/src/host.rs @@ -176,7 +176,7 @@ fn domain_to_ascii(domain: Cow<'_, [u8]>) -> Result, ParseError> { Ok(match value { Cow::Owned(value) => Cow::Owned(value), Cow::Borrowed(_) => match domain { - Cow::Borrowed(value) => unsafe { Cow::Borrowed(std::str::from_utf8_unchecked(value)) }, + Cow::Borrowed(value) => unsafe { Cow::Borrowed(core::str::from_utf8_unchecked(value)) }, Cow::Owned(value) => unsafe { String::from_utf8_unchecked(value).into() }, }, }) diff --git a/url/src/lib.rs b/url/src/lib.rs index 798536e9..9a13d871 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -175,6 +175,7 @@ use crate::net::IpAddr; use crate::net::{SocketAddr, ToSocketAddrs}; use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO}; use alloc::borrow::ToOwned; +use alloc::borrow::Cow; use alloc::str; use alloc::string::{String, ToString}; use core::borrow::Borrow; @@ -183,7 +184,6 @@ use core::fmt::Write; use core::ops::{Range, RangeFrom, RangeTo}; use core::{cmp, fmt, hash, mem}; use percent_encoding::utf8_percent_encode; -use std::borrow::Cow; #[cfg(feature = "std")] #[cfg(any( unix, diff --git a/url/src/parser.rs b/url/src/parser.rs index 4b347feb..7dc1d82c 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -9,7 +9,7 @@ use alloc::string::String; use core::fmt::{self, Formatter, Write}; use core::str; -use std::borrow::Cow; +use alloc::borrow::Cow; use crate::host::{Host, HostInternal}; use crate::Url; From 639243a3e77a77f261ec2639f32e9e18ff39c1e2 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 6 Feb 2025 13:04:51 -0500 Subject: [PATCH 3/6] more comments --- idna/src/lib.rs | 2 +- url/src/host.rs | 35 ++++++++++++++++++++--------------- url/src/lib.rs | 2 +- url/src/parser.rs | 2 +- 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/idna/src/lib.rs b/idna/src/lib.rs index ef537117..5705c411 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -99,7 +99,7 @@ impl core::fmt::Display for Errors { /// /// This process may fail. /// -/// If you have a `&str` instead of `&[u8]`, just call `.to_bytes()` on it before +/// If you have a `&str` instead of `&[u8]`, just call `.as_bytes()` on it before /// passing it to this function. It's still preferable to use this function over /// the sibling functions that take `&str`. pub fn domain_to_ascii_cow( diff --git a/url/src/host.rs b/url/src/host.rs index c01702e1..0448dfda 100644 --- a/url/src/host.rs +++ b/url/src/host.rs @@ -74,6 +74,20 @@ impl Host<&str> { } } +impl Host { + /// Parse a host: either an IPv6 address in [] square brackets, or a domain. + /// + /// + pub fn parse(input: &str) -> Result { + Host::>::parse_cow(input.into()).map(|i| i.into_owned()) + } + + /// + pub fn parse_opaque(input: &str) -> Result { + Host::>::parse_opaque_cow(input.into()).map(|i| i.into_owned()) + } +} + impl<'a> Host> { pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result { if input.starts_with('[') { @@ -84,11 +98,12 @@ impl<'a> Host> { } let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into(); let domain: Cow<'a, [u8]> = match domain { + Cow::Owned(v) => Cow::Owned(v), + // if borrowed then we can use the original cow Cow::Borrowed(_) => match input { Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()), Cow::Owned(input) => Cow::Owned(input.into_bytes()), }, - Cow::Owned(v) => Cow::Owned(v), }; let domain = domain_to_ascii(domain)?; @@ -141,6 +156,7 @@ impl<'a> Host> { Ok(Host::Domain( match utf8_percent_encode(&input, CONTROLS).into() { Cow::Owned(v) => Cow::Owned(v), + // if we're borrowing, then we can return the original Cow Cow::Borrowed(_) => input, }, )) @@ -156,25 +172,14 @@ impl<'a> Host> { } } -impl Host { - /// Parse a host: either an IPv6 address in [] square brackets, or a domain. - /// - /// - pub fn parse(input: &str) -> Result { - Host::>::parse_cow(input.into()).map(|i| i.into_owned()) - } - - // - pub fn parse_opaque(input: &str) -> Result { - Host::>::parse_opaque_cow(input.into()).map(|i| i.into_owned()) - } -} - /// convert domain with idna fn domain_to_ascii(domain: Cow<'_, [u8]>) -> Result, ParseError> { let value = idna::domain_to_ascii_cow(&domain, idna::AsciiDenyList::URL)?; + // TODO: would be better to move this into the idna crate Ok(match value { Cow::Owned(value) => Cow::Owned(value), + // SAFETY: If borrowed, then the original string is ascii and we can return the + // original Cow in order to save an allocation Cow::Borrowed(_) => match domain { Cow::Borrowed(value) => unsafe { Cow::Borrowed(core::str::from_utf8_unchecked(value)) }, Cow::Owned(value) => unsafe { String::from_utf8_unchecked(value).into() }, diff --git a/url/src/lib.rs b/url/src/lib.rs index 9a13d871..4f682b6b 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -174,8 +174,8 @@ use crate::net::IpAddr; ))] use crate::net::{SocketAddr, ToSocketAddrs}; use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO}; -use alloc::borrow::ToOwned; use alloc::borrow::Cow; +use alloc::borrow::ToOwned; use alloc::str; use alloc::string::{String, ToString}; use core::borrow::Borrow; diff --git a/url/src/parser.rs b/url/src/parser.rs index 7dc1d82c..c9e1b3d9 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -6,10 +6,10 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use alloc::borrow::Cow; use alloc::string::String; use core::fmt::{self, Formatter, Write}; use core::str; -use alloc::borrow::Cow; use crate::host::{Host, HostInternal}; use crate::Url; From b4d4154f334703c40ebd8cf577c137ecb44779bf Mon Sep 17 00:00:00 2001 From: David Sherret Date: Thu, 6 Feb 2025 13:08:52 -0500 Subject: [PATCH 4/6] add size hint for Iterator --- url/src/parser.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/url/src/parser.rs b/url/src/parser.rs index c9e1b3d9..e375f624 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -325,6 +325,10 @@ impl Iterator for Input<'_> { .by_ref() .find(|&c| !matches!(c, '\t' | '\n' | '\r')) } + + fn size_hint(&self) -> (usize, Option) { + (0, Some(self.chars.as_str().len())) + } } pub struct Parser<'a> { From 362683b359070032e7b8b085bcb4a362f8c43fbb Mon Sep 17 00:00:00 2001 From: David Sherret Date: Fri, 7 Feb 2025 10:22:38 -0500 Subject: [PATCH 5/6] move function down to idna crate --- idna/src/lib.rs | 30 +++++++++++++++++++++++++++--- idna/src/uts46.rs | 19 ++++++++++++++++--- url/src/host.rs | 17 +---------------- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 5705c411..18ae3c1d 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -86,9 +86,9 @@ impl core::fmt::Display for Errors { /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm; /// version returning a `Cow`. /// -/// Most applications should be using this function rather than the sibling functions, -/// and most applications should pass [`AsciiDenyList::URL`] as the second argument. -/// Passing [`AsciiDenyList::URL`] as the second argument makes this function also +/// Most applications should be using this function or `domain_to_ascii_from_cow` rather +/// than the sibling functions, and most applications should pass [`AsciiDenyList::URL`] as +/// the second argument. Passing [`AsciiDenyList::URL`] as the second argument makes this function also /// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point) /// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) /// algorithm. @@ -114,6 +114,30 @@ pub fn domain_to_ascii_cow( ) } +/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm; +/// version accepting and returning a `Cow`. +/// +/// Most applications should be using this function or `domain_to_ascii_cow` rather +/// than the sibling functions, and most applications should pass [`AsciiDenyList::URL`] as +/// the second argument. Passing [`AsciiDenyList::URL`] as the second argument makes this function also +/// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point) +/// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) +/// algorithm. +/// +/// Return the ASCII representation a domain name, +/// normalizing characters (upper-case to lower-case and other kinds of equivalence) +/// and using Punycode as necessary. +/// +/// This process may fail. +pub fn domain_to_ascii_from_cow(domain: Cow<'_, [u8]>, ascii_deny_list: AsciiDenyList) -> Result, Errors> { + Uts46::new().to_ascii_from_cow( + domain, + ascii_deny_list, + uts46::Hyphens::Allow, + uts46::DnsLength::Ignore, + ) +} + /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm; /// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_). /// diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index fa5ea58a..9b24d0a1 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -530,10 +530,20 @@ impl Uts46 { ascii_deny_list: AsciiDenyList, hyphens: Hyphens, dns_length: DnsLength, + ) -> Result, crate::Errors> { + self.to_ascii_from_cow(Cow::Borrowed(domain_name), ascii_deny_list, hyphens, dns_length) + } + + pub(crate) fn to_ascii_from_cow<'a>( + &self, + domain_name: Cow<'a, [u8]>, + ascii_deny_list: AsciiDenyList, + hyphens: Hyphens, + dns_length: DnsLength, ) -> Result, crate::Errors> { let mut s = String::new(); match self.process( - domain_name, + &domain_name, ascii_deny_list, hyphens, ErrorPolicy::FailFast, @@ -541,9 +551,12 @@ impl Uts46 { &mut s, None, ) { - // SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII. Ok(ProcessingSuccess::Passthrough) => { - let cow = Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) }); + // SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII. + let cow = match domain_name { + Cow::Borrowed(v) => Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(v) }), + Cow::Owned(v) => Cow::Owned(unsafe { String::from_utf8_unchecked(v) }), + }; if dns_length != DnsLength::Ignore && !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot) { diff --git a/url/src/host.rs b/url/src/host.rs index 0448dfda..7443412e 100644 --- a/url/src/host.rs +++ b/url/src/host.rs @@ -106,7 +106,7 @@ impl<'a> Host> { }, }; - let domain = domain_to_ascii(domain)?; + let domain = idna::domain_to_ascii_from_cow(domain, idna::AsciiDenyList::URL)?; if domain.is_empty() { return Err(ParseError::EmptyHost); @@ -172,21 +172,6 @@ impl<'a> Host> { } } -/// convert domain with idna -fn domain_to_ascii(domain: Cow<'_, [u8]>) -> Result, ParseError> { - let value = idna::domain_to_ascii_cow(&domain, idna::AsciiDenyList::URL)?; - // TODO: would be better to move this into the idna crate - Ok(match value { - Cow::Owned(value) => Cow::Owned(value), - // SAFETY: If borrowed, then the original string is ascii and we can return the - // original Cow in order to save an allocation - Cow::Borrowed(_) => match domain { - Cow::Borrowed(value) => unsafe { Cow::Borrowed(core::str::from_utf8_unchecked(value)) }, - Cow::Owned(value) => unsafe { String::from_utf8_unchecked(value).into() }, - }, - }) -} - impl> fmt::Display for Host { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { From a18ac4cc971668a193b53a4959bd99625b960901 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Fri, 7 Feb 2025 10:23:22 -0500 Subject: [PATCH 6/6] format --- idna/src/lib.rs | 5 ++++- idna/src/uts46.rs | 7 ++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/idna/src/lib.rs b/idna/src/lib.rs index 18ae3c1d..81f315a0 100644 --- a/idna/src/lib.rs +++ b/idna/src/lib.rs @@ -129,7 +129,10 @@ pub fn domain_to_ascii_cow( /// and using Punycode as necessary. /// /// This process may fail. -pub fn domain_to_ascii_from_cow(domain: Cow<'_, [u8]>, ascii_deny_list: AsciiDenyList) -> Result, Errors> { +pub fn domain_to_ascii_from_cow( + domain: Cow<'_, [u8]>, + ascii_deny_list: AsciiDenyList, +) -> Result, Errors> { Uts46::new().to_ascii_from_cow( domain, ascii_deny_list, diff --git a/idna/src/uts46.rs b/idna/src/uts46.rs index 9b24d0a1..9a450fa0 100644 --- a/idna/src/uts46.rs +++ b/idna/src/uts46.rs @@ -531,7 +531,12 @@ impl Uts46 { hyphens: Hyphens, dns_length: DnsLength, ) -> Result, crate::Errors> { - self.to_ascii_from_cow(Cow::Borrowed(domain_name), ascii_deny_list, hyphens, dns_length) + self.to_ascii_from_cow( + Cow::Borrowed(domain_name), + ascii_deny_list, + hyphens, + dns_length, + ) } pub(crate) fn to_ascii_from_cow<'a>(