Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: remove heap allocation in parse_host #1021

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 31 additions & 4 deletions idna/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ impl core::fmt::Display for Errors {
/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
/// version returning a `Cow`.
///
/// Most applications should be using this function rather than the sibling functions,
/// and most applications should pass [`AsciiDenyList::URL`] as the second argument.
/// Passing [`AsciiDenyList::URL`] as the second argument makes this function also
/// Most applications should be using this function or `domain_to_ascii_from_cow` rather
/// than the sibling functions, and most applications should pass [`AsciiDenyList::URL`] as
/// the second argument. Passing [`AsciiDenyList::URL`] as the second argument makes this function also
/// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point)
/// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii)
/// algorithm.
Expand All @@ -99,7 +99,7 @@ impl core::fmt::Display for Errors {
///
/// This process may fail.
///
/// If you have a `&str` instead of `&[u8]`, just call `.to_bytes()` on it before
/// If you have a `&str` instead of `&[u8]`, just call `.as_bytes()` on it before
/// passing it to this function. It's still preferable to use this function over
/// the sibling functions that take `&str`.
pub fn domain_to_ascii_cow(
Expand All @@ -114,6 +114,33 @@ pub fn domain_to_ascii_cow(
)
}

/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
/// version accepting and returning a `Cow`.
///
/// Most applications should be using this function or `domain_to_ascii_cow` rather
/// than the sibling functions, and most applications should pass [`AsciiDenyList::URL`] as
/// the second argument. Passing [`AsciiDenyList::URL`] as the second argument makes this function also
/// perform the [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point)
/// check in addition to the [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii)
/// algorithm.
///
/// Return the ASCII representation a domain name,
/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
/// and using Punycode as necessary.
///
/// This process may fail.
pub fn domain_to_ascii_from_cow(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hsivonen Please check if you find this new public API of idna to be acceptable.

domain: Cow<'_, [u8]>,
ascii_deny_list: AsciiDenyList,
) -> Result<Cow<'_, str>, Errors> {
Uts46::new().to_ascii_from_cow(
domain,
ascii_deny_list,
uts46::Hyphens::Allow,
uts46::DnsLength::Ignore,
)
}

/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm;
/// version returning `String` and no ASCII deny list (i.e. _UseSTD3ASCIIRules=false_).
///
Expand Down
24 changes: 21 additions & 3 deletions idna/src/uts46.rs
Original file line number Diff line number Diff line change
Expand Up @@ -530,20 +530,38 @@
ascii_deny_list: AsciiDenyList,
hyphens: Hyphens,
dns_length: DnsLength,
) -> Result<Cow<'a, str>, crate::Errors> {
self.to_ascii_from_cow(
Cow::Borrowed(domain_name),
ascii_deny_list,
hyphens,
dns_length,

Check warning on line 538 in idna/src/uts46.rs

View check run for this annotation

Codecov / codecov/patch

idna/src/uts46.rs#L536-L538

Added lines #L536 - L538 were not covered by tests
)
}

pub(crate) fn to_ascii_from_cow<'a>(
&self,
domain_name: Cow<'a, [u8]>,
ascii_deny_list: AsciiDenyList,
hyphens: Hyphens,
dns_length: DnsLength,
) -> Result<Cow<'a, str>, crate::Errors> {
let mut s = String::new();
match self.process(
domain_name,
&domain_name,
ascii_deny_list,
hyphens,
ErrorPolicy::FailFast,
|_, _, _| false,
&mut s,
None,
) {
// SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII.
Ok(ProcessingSuccess::Passthrough) => {
let cow = Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(domain_name) });
// SAFETY: `ProcessingSuccess::Passthrough` asserts that `domain_name` is ASCII.
let cow = match domain_name {
Cow::Borrowed(v) => Cow::Borrowed(unsafe { core::str::from_utf8_unchecked(v) }),
Cow::Owned(v) => Cow::Owned(unsafe { String::from_utf8_unchecked(v) }),
};
if dns_length != DnsLength::Ignore
&& !verify_dns_length(&cow, dns_length == DnsLength::VerifyAllowRootDot)
{
Expand Down
46 changes: 35 additions & 11 deletions url/src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
use alloc::borrow::Cow;
use alloc::borrow::ToOwned;
use alloc::string::String;
use alloc::string::ToString;
use alloc::vec::Vec;
use core::cmp;
use core::fmt::{self, Formatter};
Expand All @@ -30,8 +29,8 @@
Ipv6(Ipv6Addr),
}

impl From<Host<String>> for HostInternal {
fn from(host: Host<String>) -> HostInternal {
impl From<Host<Cow<'_, str>>> for HostInternal {
fn from(host: Host<Cow<'_, str>>) -> HostInternal {
match host {
Host::Domain(ref s) if s.is_empty() => HostInternal::None,
Host::Domain(_) => HostInternal::Domain,
Expand Down Expand Up @@ -80,15 +79,34 @@
///
/// <https://url.spec.whatwg.org/#host-parsing>
pub fn parse(input: &str) -> Result<Self, ParseError> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't add, change, or remove from the public API, but please verify this as well in case I missed something.

Host::<Cow<str>>::parse_cow(input.into()).map(|i| i.into_owned())
}

/// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
Host::<Cow<str>>::parse_opaque_cow(input.into()).map(|i| i.into_owned())

Check warning on line 87 in url/src/host.rs

View check run for this annotation

Codecov / codecov/patch

url/src/host.rs#L86-L87

Added lines #L86 - L87 were not covered by tests
}
}

impl<'a> Host<Cow<'a, str>> {
pub(crate) fn parse_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
if input.starts_with('[') {
if !input.ends_with(']') {
return Err(ParseError::InvalidIpv6Address);
}
return parse_ipv6addr(&input[1..input.len() - 1]).map(Host::Ipv6);
}
let domain: Cow<'_, [u8]> = percent_decode(input.as_bytes()).into();
let domain: Cow<'a, [u8]> = match domain {
Cow::Owned(v) => Cow::Owned(v),
// if borrowed then we can use the original cow
Cow::Borrowed(_) => match input {
Cow::Borrowed(input) => Cow::Borrowed(input.as_bytes()),
Cow::Owned(input) => Cow::Owned(input.into_bytes()),
},
};

let domain = Self::domain_to_ascii(&domain)?;
let domain = idna::domain_to_ascii_from_cow(domain, idna::AsciiDenyList::URL)?;

if domain.is_empty() {
return Err(ParseError::EmptyHost);
Expand All @@ -98,12 +116,11 @@
let address = parse_ipv4addr(&domain)?;
Ok(Host::Ipv4(address))
} else {
Ok(Host::Domain(domain.to_string()))
Ok(Host::Domain(domain))
}
}

// <https://url.spec.whatwg.org/#concept-opaque-host-parser>
pub fn parse_opaque(input: &str) -> Result<Self, ParseError> {
pub(crate) fn parse_opaque_cow(input: Cow<'a, str>) -> Result<Self, ParseError> {
if input.starts_with('[') {
if !input.ends_with(']') {
return Err(ParseError::InvalidIpv6Address);
Expand Down Expand Up @@ -137,14 +154,21 @@
Err(ParseError::InvalidDomainCharacter)
} else {
Ok(Host::Domain(
utf8_percent_encode(input, CONTROLS).to_string(),
match utf8_percent_encode(&input, CONTROLS).into() {
Cow::Owned(v) => Cow::Owned(v),
// if we're borrowing, then we can return the original Cow
Cow::Borrowed(_) => input,
},
))
}
}

/// convert domain with idna
fn domain_to_ascii(domain: &[u8]) -> Result<Cow<'_, str>, ParseError> {
idna::domain_to_ascii_cow(domain, idna::AsciiDenyList::URL).map_err(Into::into)
pub(crate) fn into_owned(self) -> Host<String> {
match self {
Host::Domain(s) => Host::Domain(s.into_owned()),
Host::Ipv4(ip) => Host::Ipv4(ip),
Host::Ipv6(ip) => Host::Ipv6(ip),

Check warning on line 170 in url/src/host.rs

View check run for this annotation

Codecov / codecov/patch

url/src/host.rs#L169-L170

Added lines #L169 - L170 were not covered by tests
}
}
}

Expand Down
9 changes: 5 additions & 4 deletions url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
))]
use crate::net::{SocketAddr, ToSocketAddrs};
use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO};
use alloc::borrow::Cow;
use alloc::borrow::ToOwned;
use alloc::str;
use alloc::string::{String, ToString};
Expand Down Expand Up @@ -2032,9 +2033,9 @@
}
}
if SchemeType::from(self.scheme()).is_special() {
self.set_host_internal(Host::parse(host_substr)?, None);
self.set_host_internal(Host::parse_cow(host_substr.into())?, None);
} else {
self.set_host_internal(Host::parse_opaque(host_substr)?, None);
self.set_host_internal(Host::parse_opaque_cow(host_substr.into())?, None);
}
} else if self.has_host() {
if scheme_type.is_special() && !scheme_type.is_file() {
Expand Down Expand Up @@ -2070,7 +2071,7 @@
}

/// opt_new_port: None means leave unchanged, Some(None) means remove any port number.
fn set_host_internal(&mut self, host: Host<String>, opt_new_port: Option<Option<u16>>) {
fn set_host_internal(&mut self, host: Host<Cow<'_, str>>, opt_new_port: Option<Option<u16>>) {
let old_suffix_pos = if opt_new_port.is_some() {
self.path_start
} else {
Expand Down Expand Up @@ -2987,7 +2988,7 @@
serialization.push(':');
}
Prefix::UNC(server, share) | Prefix::VerbatimUNC(server, share) => {
let host = Host::parse(server.to_str().ok_or(())?).map_err(|_| ())?;
let host = Host::parse_cow(server.to_str().ok_or(())?.into()).map_err(|_| ())?;

Check warning on line 2991 in url/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

url/src/lib.rs#L2991

Added line #L2991 was not covered by tests
write!(serialization, "{}", host).unwrap();
host_end = to_u32(serialization.len()).unwrap();
host_internal = host.into();
Expand Down
42 changes: 22 additions & 20 deletions url/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use alloc::borrow::Cow;
use alloc::string::String;
use alloc::string::ToString;
use core::fmt::{self, Formatter, Write};
use core::str;

Expand Down Expand Up @@ -329,6 +329,10 @@
fn next(&mut self) -> Option<char> {
self.chars.by_ref().find(|&c| !ascii_tab_or_new_line(c))
}

fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.chars.as_str().len()))
}
}

pub struct Parser<'a> {
Expand Down Expand Up @@ -987,7 +991,7 @@
pub fn parse_host(
mut input: Input<'_>,
scheme_type: SchemeType,
) -> ParseResult<(Host<String>, Input<'_>)> {
) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
if scheme_type.is_file() {
return Parser::get_file_host(input);
}
Expand Down Expand Up @@ -1018,34 +1022,34 @@
}
bytes += c.len_utf8();
}
let replaced: String;
let host_str;
{
let host_input = input.by_ref().take(non_ignored_chars);
if has_ignored_chars {
replaced = host_input.collect();
host_str = &*replaced
host_str = Cow::Owned(host_input.collect());
} else {
for _ in host_input {}
host_str = &input_str[..bytes]
host_str = Cow::Borrowed(&input_str[..bytes]);
}
}
if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() {
return Err(ParseError::EmptyHost);
}
if !scheme_type.is_special() {
let host = Host::parse_opaque(host_str)?;
let host = Host::parse_opaque_cow(host_str)?;
return Ok((host, input));
}
let host = Host::parse(host_str)?;
let host = Host::parse_cow(host_str)?;
Ok((host, input))
}

fn get_file_host(input: Input<'_>) -> ParseResult<(Host<String>, Input<'_>)> {
fn get_file_host(input: Input<'_>) -> ParseResult<(Host<Cow<'_, str>>, Input<'_>)> {
let (_, host_str, remaining) = Parser::file_host(input)?;
let host = match Host::parse(&host_str)? {
Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()),
host => host,
Host::Domain(ref d) if d == "localhost" => Host::Domain(Cow::Borrowed("")),
Host::Domain(s) => Host::Domain(Cow::Owned(s)),
Host::Ipv4(ip) => Host::Ipv4(ip),
Host::Ipv6(ip) => Host::Ipv6(ip),

Check warning on line 1052 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1050-L1052

Added lines #L1050 - L1052 were not covered by tests
};
Ok((host, remaining))
}
Expand All @@ -1060,7 +1064,7 @@
has_host = false;
HostInternal::None
} else {
match Host::parse(&host_str)? {
match Host::parse_cow(host_str)? {
Host::Domain(ref d) if d == "localhost" => {
has_host = false;
HostInternal::None
Expand All @@ -1075,7 +1079,7 @@
Ok((has_host, host, remaining))
}

pub fn file_host(input: Input) -> ParseResult<(bool, String, Input)> {
pub fn file_host(input: Input<'_>) -> ParseResult<(bool, Cow<'_, str>, Input<'_>)> {
// Undo the Input abstraction here to avoid allocating in the common case
// where the host part of the input does not contain any tab or newline
let input_str = input.chars.as_str();
Expand All @@ -1090,23 +1094,21 @@
}
bytes += c.len_utf8();
}
let replaced: String;
let host_str;
let mut remaining = input.clone();
{
let host_input = remaining.by_ref().take(non_ignored_chars);
if has_ignored_chars {
replaced = host_input.collect();
host_str = &*replaced
host_str = Cow::Owned(host_input.collect());

Check warning on line 1102 in url/src/parser.rs

View check run for this annotation

Codecov / codecov/patch

url/src/parser.rs#L1102

Added line #L1102 was not covered by tests
} else {
for _ in host_input {}
host_str = &input_str[..bytes]
host_str = Cow::Borrowed(&input_str[..bytes]);
}
}
if is_windows_drive_letter(host_str) {
return Ok((false, "".to_string(), input));
if is_windows_drive_letter(&host_str) {
return Ok((false, "".into(), input));
}
Ok((true, host_str.to_string(), remaining))
Ok((true, host_str, remaining))
}

pub fn parse_port<P>(
Expand Down
4 changes: 2 additions & 2 deletions url/src/quirks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> {
let scheme = url.scheme();
let scheme_type = SchemeType::from(scheme);
if scheme_type == SchemeType::File && new_host.is_empty() {
url.set_host_internal(Host::Domain(String::new()), None);
url.set_host_internal(Host::Domain("".into()), None);
return Ok(());
}

Expand Down Expand Up @@ -208,7 +208,7 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> {
let input = Input::new_no_trim(new_hostname);
let scheme_type = SchemeType::from(url.scheme());
if scheme_type == SchemeType::File && new_hostname.is_empty() {
url.set_host_internal(Host::Domain(String::new()), None);
url.set_host_internal(Host::Domain("".into()), None);
return Ok(());
}

Expand Down
Loading