From a4a585da2a98f94407e602fc2328d5e4207185f3 Mon Sep 17 00:00:00 2001 From: Aleksei Latyshev Date: Thu, 26 Sep 2024 20:05:55 +0200 Subject: [PATCH 1/3] [`refurb`] implement hardcoded-string-charset (FURB156) --- .../resources/test/fixtures/refurb/FURB156.py | 20 ++ .../src/checkers/ast/analyze/expression.rs | 8 +- crates/ruff_linter/src/codes.rs | 1 + crates/ruff_linter/src/rules/refurb/mod.rs | 1 + .../refurb/rules/hardcoded_string_charset.rs | 184 ++++++++++++ .../ruff_linter/src/rules/refurb/rules/mod.rs | 2 + ...es__refurb__tests__FURB156_FURB156.py.snap | 274 ++++++++++++++++++ ruff.schema.json | 1 + 8 files changed, 490 insertions(+), 1 deletion(-) create mode 100644 crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py create mode 100644 crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs create mode 100644 crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB156_FURB156.py.snap diff --git a/crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py b/crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py new file mode 100644 index 0000000000000..5673e9f506bb2 --- /dev/null +++ b/crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py @@ -0,0 +1,20 @@ +# Errors + +_ = "0123456789" +_ = "01234567" +_ = "0123456789abcdefABCDEF" +_ = "abcdefghijklmnopqrstuvwxyz" +_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +_ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +_ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +_ = " \t\n\r\v\f" + +_ = "" in "1234567890" +_ = "" in "12345670" +_ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' + +# Ok + +_ = "1234567890" +_ = "1234" +_ = "" in "1234" diff --git a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs index 64455275cca72..99deb3b994998 100644 --- a/crates/ruff_linter/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff_linter/src/checkers/ast/analyze/expression.rs @@ -1355,6 +1355,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { if checker.enabled(Rule::SingleItemMembershipTest) { refurb::rules::single_item_membership_test(checker, expr, left, ops, comparators); } + if checker.enabled(Rule::HardcodedStringCharset) { + refurb::rules::hardcoded_string_charset_comparison(checker, compare); + } } Expr::NumberLiteral(number_literal @ ast::ExprNumberLiteral { .. }) => { if checker.source_type.is_stub() && checker.enabled(Rule::NumericLiteralTooLong) { @@ -1364,7 +1367,7 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { refurb::rules::math_constant(checker, number_literal); } } - Expr::StringLiteral(ast::ExprStringLiteral { value, range: _ }) => { + Expr::StringLiteral(string_like @ ast::ExprStringLiteral { value, range: _ }) => { if checker.enabled(Rule::UnicodeKindPrefix) { for string_part in value { pyupgrade::rules::unicode_kind_prefix(checker, string_part); @@ -1375,6 +1378,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { ruff::rules::missing_fstring_syntax(checker, string_literal); } } + if checker.enabled(Rule::HardcodedStringCharset) { + refurb::rules::hardcoded_string_charset_literal(checker, string_like); + } } Expr::If( if_exp @ ast::ExprIf { diff --git a/crates/ruff_linter/src/codes.rs b/crates/ruff_linter/src/codes.rs index 21486729a5770..41e4482e42727 100644 --- a/crates/ruff_linter/src/codes.rs +++ b/crates/ruff_linter/src/codes.rs @@ -1055,6 +1055,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Refurb, "148") => (RuleGroup::Preview, rules::refurb::rules::UnnecessaryEnumerate), (Refurb, "152") => (RuleGroup::Preview, rules::refurb::rules::MathConstant), (Refurb, "154") => (RuleGroup::Preview, rules::refurb::rules::RepeatedGlobal), + (Refurb, "156") => (RuleGroup::Preview, rules::refurb::rules::HardcodedStringCharset), (Refurb, "157") => (RuleGroup::Preview, rules::refurb::rules::VerboseDecimalConstructor), (Refurb, "161") => (RuleGroup::Stable, rules::refurb::rules::BitCount), (Refurb, "163") => (RuleGroup::Stable, rules::refurb::rules::RedundantLogBase), diff --git a/crates/ruff_linter/src/rules/refurb/mod.rs b/crates/ruff_linter/src/rules/refurb/mod.rs index 6f438ba632e6f..640773c3e72fb 100644 --- a/crates/ruff_linter/src/rules/refurb/mod.rs +++ b/crates/ruff_linter/src/rules/refurb/mod.rs @@ -29,6 +29,7 @@ mod tests { #[test_case(Rule::UnnecessaryEnumerate, Path::new("FURB148.py"))] #[test_case(Rule::MathConstant, Path::new("FURB152.py"))] #[test_case(Rule::RepeatedGlobal, Path::new("FURB154.py"))] + #[test_case(Rule::HardcodedStringCharset, Path::new("FURB156.py"))] #[test_case(Rule::VerboseDecimalConstructor, Path::new("FURB157.py"))] #[test_case(Rule::UnnecessaryFromFloat, Path::new("FURB164.py"))] #[test_case(Rule::PrintEmptyString, Path::new("FURB105.py"))] diff --git a/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs b/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs new file mode 100644 index 0000000000000..b9f37e0c662e9 --- /dev/null +++ b/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs @@ -0,0 +1,184 @@ +use crate::checkers::ast::Checker; +use crate::importer::ImportRequest; +use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::{CmpOp, Expr, ExprCompare, ExprStringLiteral}; +use ruff_text_size::{Ranged, TextRange}; + +/// ## What it does +/// Checks for uses of hardcoded charsets, which are defined in Python string module. +/// +/// ## Why is this bad? +/// Usage of named charsets from the standard library is more readable and less error-prone. +/// +/// ## Example +/// ```python +/// x = "0123456789" +/// y in "abcdefghijklmnopqrstuvwxyz" +/// ``` +/// +/// Use instead +/// ```python +/// import string +/// +/// x = string.digits +/// y in string.ascii_lowercase +/// ``` +/// +/// ## References +/// - [Python documentation: String constants](https://docs.python.org/3/library/string.html#string-constants) +#[violation] +pub struct HardcodedStringCharset { + name: &'static str, +} + +impl AlwaysFixableViolation for HardcodedStringCharset { + #[derive_message_formats] + fn message(&self) -> String { + format!("Use of hardcoded string charset") + } + + fn fix_title(&self) -> String { + let HardcodedStringCharset { name } = self; + format!("Replace hardcoded charset with `string.{name}`") + } +} + +struct NamedCharset { + name: &'static str, + bytes: &'static [u8], + ascii_char_set: AsciiCharSet, +} + +/// Represents the set of ascii characters in form of a bitset. +#[derive(Copy, Clone, Eq, PartialEq)] +struct AsciiCharSet(u128); + +impl AsciiCharSet { + /// Creates the set of ascii characters from `bytes`. + /// Returns None if there is non-ascii byte. + fn from_bytes(bytes: &[u8]) -> Option { + bytes + .iter() + .try_fold(0, |acc, &byte| byte.is_ascii().then(|| acc | (1 << byte))) + .map(Self) + } + + /// Creates the set of ascii characters from `bytes`. + /// Returns None if there is non-ascii byte. + const fn from_bytes_const(bytes: &[u8]) -> Option { + // TODO: remove in favor of [`Self::from_bytes`], when its implementation will be + // supported in `const` fn + // - https://github.com/rust-lang/rust/issues/67792 + // - https://github.com/rust-lang/rust/issues/87575 + let mut bitset = 0; + let mut i = 0; + while i < bytes.len() { + if !bytes[i].is_ascii() { + return None; + } + bitset |= 1 << bytes[i]; + i += 1; + } + Some(Self(bitset)) + } + + const fn from_bytes_const_unwrap(bytes: &[u8]) -> Self { + // TODO: replace with .unwrap() in the caller, when Option::unwrap will be stable in `const fn` + // - https://github.com/rust-lang/rust/issues/67441) + match Self::from_bytes_const(bytes) { + Some(res) => res, + None => unreachable!(), + } + } +} + +impl NamedCharset { + const fn new(name: &'static str, bytes: &'static [u8]) -> Self { + Self { + name, + bytes, + // SAFETY: The named charset is guaranteed to have only ascii bytes. + ascii_char_set: AsciiCharSet::from_bytes_const_unwrap(bytes), + } + } +} + +const KNOWN_NAMED_CHARSETS: [NamedCharset; 9] = [ + NamedCharset::new( + "ascii_letters", + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", + ), + NamedCharset::new("ascii_lowercase", b"abcdefghijklmnopqrstuvwxyz"), + NamedCharset::new("ascii_uppercase", b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"), + NamedCharset::new("digits", b"0123456789"), + NamedCharset::new("hexdigits", b"0123456789abcdefABCDEF"), + NamedCharset::new("octdigits", b"01234567"), + NamedCharset::new("punctuation", b"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"), + NamedCharset::new( + "printable", + b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"\ + #$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c", + ), + NamedCharset::new("whitespace", b" \t\n\r\x0b\x0c"), +]; + +fn check_charset_as_set(bytes: &[u8]) -> Option<&NamedCharset> { + let ascii_char_set = AsciiCharSet::from_bytes(bytes)?; + + KNOWN_NAMED_CHARSETS + .iter() + .find(|&charset| charset.ascii_char_set == ascii_char_set) +} + +fn check_charset_exact(bytes: &[u8]) -> Option<&NamedCharset> { + KNOWN_NAMED_CHARSETS + .iter() + .find(|&charset| charset.bytes == bytes) +} + +fn push_diagnostic(checker: &mut Checker, range: TextRange, charset: &NamedCharset) { + let name = charset.name; + let mut diagnostic = Diagnostic::new(HardcodedStringCharset { name }, range); + diagnostic.try_set_fix(|| { + let (edit, binding) = checker.importer().get_or_import_symbol( + &ImportRequest::import("string", name), + range.start(), + checker.semantic(), + )?; + Ok(Fix::safe_edits( + Edit::range_replacement(binding, range), + [edit], + )) + }); + checker.diagnostics.push(diagnostic); +} + +/// FURB156 +pub(crate) fn hardcoded_string_charset_comparison(checker: &mut Checker, compare: &ExprCompare) { + let ([CmpOp::In | CmpOp::NotIn], [Expr::StringLiteral(ExprStringLiteral { value, range })]) = + (compare.ops.as_ref(), compare.comparators.as_ref()) + else { + return; + }; + + let bytes = value.to_str().as_bytes(); + + let Some(charset) = check_charset_as_set(bytes) else { + return; + }; + + // In this case the diagnostic will be emitted via string_literal check. + if charset.bytes == bytes { + return; + } + + push_diagnostic(checker, *range, charset); +} + +/// FURB156 +pub(crate) fn hardcoded_string_charset_literal(checker: &mut Checker, expr: &ExprStringLiteral) { + if let Some(charset) = check_charset_exact(expr.value.to_str().as_bytes()) { + push_diagnostic(checker, expr.range(), charset); + } +} diff --git a/crates/ruff_linter/src/rules/refurb/rules/mod.rs b/crates/ruff_linter/src/rules/refurb/rules/mod.rs index aceaba6d2445c..e71ded4315d23 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/mod.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/mod.rs @@ -3,6 +3,7 @@ pub(crate) use check_and_remove_from_set::*; pub(crate) use delete_full_slice::*; pub(crate) use for_loop_set_mutations::*; pub(crate) use fstring_number_format::*; +pub(crate) use hardcoded_string_charset::*; pub(crate) use hashlib_digest_hex::*; pub(crate) use if_exp_instead_of_or_operator::*; pub(crate) use if_expr_min_max::*; @@ -36,6 +37,7 @@ mod check_and_remove_from_set; mod delete_full_slice; mod for_loop_set_mutations; mod fstring_number_format; +mod hardcoded_string_charset; mod hashlib_digest_hex; mod if_exp_instead_of_or_operator; mod if_expr_min_max; diff --git a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB156_FURB156.py.snap b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB156_FURB156.py.snap new file mode 100644 index 0000000000000..97ae575aadee1 --- /dev/null +++ b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB156_FURB156.py.snap @@ -0,0 +1,274 @@ +--- +source: crates/ruff_linter/src/rules/refurb/mod.rs +--- +FURB156.py:3:5: FURB156 [*] Use of hardcoded string charset + | +1 | # Errors +2 | +3 | _ = "0123456789" + | ^^^^^^^^^^^^ FURB156 +4 | _ = "01234567" +5 | _ = "0123456789abcdefABCDEF" + | + = help: Replace hardcoded charset with `string.digits` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 |-_ = "0123456789" + 4 |+_ = string.digits +4 5 | _ = "01234567" +5 6 | _ = "0123456789abcdefABCDEF" +6 7 | _ = "abcdefghijklmnopqrstuvwxyz" + +FURB156.py:4:5: FURB156 [*] Use of hardcoded string charset + | +3 | _ = "0123456789" +4 | _ = "01234567" + | ^^^^^^^^^^ FURB156 +5 | _ = "0123456789abcdefABCDEF" +6 | _ = "abcdefghijklmnopqrstuvwxyz" + | + = help: Replace hardcoded charset with `string.octdigits` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 |-_ = "01234567" + 5 |+_ = string.octdigits +5 6 | _ = "0123456789abcdefABCDEF" +6 7 | _ = "abcdefghijklmnopqrstuvwxyz" +7 8 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + +FURB156.py:5:5: FURB156 [*] Use of hardcoded string charset + | +3 | _ = "0123456789" +4 | _ = "01234567" +5 | _ = "0123456789abcdefABCDEF" + | ^^^^^^^^^^^^^^^^^^^^^^^^ FURB156 +6 | _ = "abcdefghijklmnopqrstuvwxyz" +7 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + | + = help: Replace hardcoded charset with `string.hexdigits` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +5 |-_ = "0123456789abcdefABCDEF" + 6 |+_ = string.hexdigits +6 7 | _ = "abcdefghijklmnopqrstuvwxyz" +7 8 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +8 9 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + +FURB156.py:6:5: FURB156 [*] Use of hardcoded string charset + | +4 | _ = "01234567" +5 | _ = "0123456789abcdefABCDEF" +6 | _ = "abcdefghijklmnopqrstuvwxyz" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB156 +7 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +8 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + | + = help: Replace hardcoded charset with `string.ascii_lowercase` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +5 6 | _ = "0123456789abcdefABCDEF" +6 |-_ = "abcdefghijklmnopqrstuvwxyz" + 7 |+_ = string.ascii_lowercase +7 8 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +8 9 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +9 10 | _ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" + +FURB156.py:7:5: FURB156 [*] Use of hardcoded string charset + | +5 | _ = "0123456789abcdefABCDEF" +6 | _ = "abcdefghijklmnopqrstuvwxyz" +7 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB156 +8 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +9 | _ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" + | + = help: Replace hardcoded charset with `string.ascii_uppercase` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +5 6 | _ = "0123456789abcdefABCDEF" +6 7 | _ = "abcdefghijklmnopqrstuvwxyz" +7 |-_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 8 |+_ = string.ascii_uppercase +8 9 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +9 10 | _ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +10 11 | _ = " \t\n\r\v\f" + +FURB156.py:8:5: FURB156 [*] Use of hardcoded string charset + | + 6 | _ = "abcdefghijklmnopqrstuvwxyz" + 7 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 8 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB156 + 9 | _ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +10 | _ = " \t\n\r\v\f" + | + = help: Replace hardcoded charset with `string.ascii_letters` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +5 6 | _ = "0123456789abcdefABCDEF" +6 7 | _ = "abcdefghijklmnopqrstuvwxyz" +7 8 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +8 |-_ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + 9 |+_ = string.ascii_letters +9 10 | _ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +10 11 | _ = " \t\n\r\v\f" +11 12 | + +FURB156.py:9:5: FURB156 [*] Use of hardcoded string charset + | + 7 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + 8 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + 9 | _ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB156 +10 | _ = " \t\n\r\v\f" + | + = help: Replace hardcoded charset with `string.punctuation` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +-------------------------------------------------------------------------------- +6 7 | _ = "abcdefghijklmnopqrstuvwxyz" +7 8 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +8 9 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +9 |-_ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" + 10 |+_ = string.punctuation +10 11 | _ = " \t\n\r\v\f" +11 12 | +12 13 | _ = "" in "1234567890" + +FURB156.py:10:5: FURB156 [*] Use of hardcoded string charset + | + 8 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + 9 | _ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +10 | _ = " \t\n\r\v\f" + | ^^^^^^^^^^^^^ FURB156 +11 | +12 | _ = "" in "1234567890" + | + = help: Replace hardcoded charset with `string.whitespace` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +-------------------------------------------------------------------------------- +7 8 | _ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +8 9 | _ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +9 10 | _ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +10 |-_ = " \t\n\r\v\f" + 11 |+_ = string.whitespace +11 12 | +12 13 | _ = "" in "1234567890" +13 14 | _ = "" in "12345670" + +FURB156.py:12:11: FURB156 [*] Use of hardcoded string charset + | +10 | _ = " \t\n\r\v\f" +11 | +12 | _ = "" in "1234567890" + | ^^^^^^^^^^^^ FURB156 +13 | _ = "" in "12345670" +14 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' + | + = help: Replace hardcoded charset with `string.digits` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +-------------------------------------------------------------------------------- +9 10 | _ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" +10 11 | _ = " \t\n\r\v\f" +11 12 | +12 |-_ = "" in "1234567890" + 13 |+_ = "" in string.digits +13 14 | _ = "" in "12345670" +14 15 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' +15 16 | + +FURB156.py:13:11: FURB156 [*] Use of hardcoded string charset + | +12 | _ = "" in "1234567890" +13 | _ = "" in "12345670" + | ^^^^^^^^^^ FURB156 +14 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' + | + = help: Replace hardcoded charset with `string.octdigits` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +-------------------------------------------------------------------------------- +10 11 | _ = " \t\n\r\v\f" +11 12 | +12 13 | _ = "" in "1234567890" +13 |-_ = "" in "12345670" + 14 |+_ = "" in string.octdigits +14 15 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' +15 16 | +16 17 | # Ok + +FURB156.py:14:5: FURB156 [*] Use of hardcoded string charset + | +12 | _ = "" in "1234567890" +13 | _ = "" in "12345670" +14 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB156 +15 | +16 | # Ok + | + = help: Replace hardcoded charset with `string.printable` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +-------------------------------------------------------------------------------- +11 12 | +12 13 | _ = "" in "1234567890" +13 14 | _ = "" in "12345670" +14 |-_ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' + 15 |+_ = string.printable +15 16 | +16 17 | # Ok +17 18 | diff --git a/ruff.schema.json b/ruff.schema.json index 259dff791af34..e738d7336c25b 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -3248,6 +3248,7 @@ "FURB15", "FURB152", "FURB154", + "FURB156", "FURB157", "FURB16", "FURB161", From 066e7006e4b5fad341fa8c84a85ec1f5cf5d3602 Mon Sep 17 00:00:00 2001 From: Aleksei Latyshev Date: Sun, 29 Sep 2024 18:11:04 +0200 Subject: [PATCH 2/3] [`refurb`] FURB156: add test for `parenthesized_range` --- .../resources/test/fixtures/refurb/FURB156.py | 11 ++ .../refurb/rules/hardcoded_string_charset.rs | 40 +++++- ...es__refurb__tests__FURB156_FURB156.py.snap | 117 ++++++++++++++++-- 3 files changed, 154 insertions(+), 14 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py b/crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py index 5673e9f506bb2..db7fe0c455499 100644 --- a/crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py +++ b/crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py @@ -12,6 +12,17 @@ _ = "" in "1234567890" _ = "" in "12345670" _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' +_ = ( + '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&' + "'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c" +) +_ = id("0123" + "4567" + "89") +_ = "" in ("123" + "456" + "789" + "0") # Ok diff --git a/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs b/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs index b9f37e0c662e9..219dcd818be19 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs @@ -2,8 +2,9 @@ use crate::checkers::ast::Checker; use crate::importer::ImportRequest; use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; -use ruff_python_ast::{CmpOp, Expr, ExprCompare, ExprStringLiteral}; -use ruff_text_size::{Ranged, TextRange}; +use ruff_python_ast::parenthesize::parenthesized_range; +use ruff_python_ast::{CmpOp, Expr, ExprCall, ExprCompare, ExprStringLiteral}; +use ruff_text_size::TextRange; /// ## What it does /// Checks for uses of hardcoded charsets, which are defined in Python string module. @@ -156,8 +157,10 @@ fn push_diagnostic(checker: &mut Checker, range: TextRange, charset: &NamedChars /// FURB156 pub(crate) fn hardcoded_string_charset_comparison(checker: &mut Checker, compare: &ExprCompare) { - let ([CmpOp::In | CmpOp::NotIn], [Expr::StringLiteral(ExprStringLiteral { value, range })]) = - (compare.ops.as_ref(), compare.comparators.as_ref()) + let ( + [CmpOp::In | CmpOp::NotIn], + [Expr::StringLiteral(string_literal @ ExprStringLiteral { value, .. })], + ) = (compare.ops.as_ref(), compare.comparators.as_ref()) else { return; }; @@ -173,12 +176,37 @@ pub(crate) fn hardcoded_string_charset_comparison(checker: &mut Checker, compare return; } - push_diagnostic(checker, *range, charset); + let range = parenthesized_range( + string_literal.into(), + compare.into(), + checker.comment_ranges(), + checker.locator().contents(), + ) + .unwrap_or(string_literal.range); + + push_diagnostic(checker, range, charset); } /// FURB156 pub(crate) fn hardcoded_string_charset_literal(checker: &mut Checker, expr: &ExprStringLiteral) { if let Some(charset) = check_charset_exact(expr.value.to_str().as_bytes()) { - push_diagnostic(checker, expr.range(), charset); + let range = parenthesized_range( + expr.into(), + checker.semantic().current_expression_parent().map_or_else( + || checker.semantic().current_statement().into(), + |parent| { + if let Expr::Call(ExprCall { arguments, .. }) = parent { + arguments.into() + } else { + parent.into() + } + }, + ), + checker.comment_ranges(), + checker.locator().contents(), + ) + .unwrap_or(expr.range); + + push_diagnostic(checker, range, charset); } } diff --git a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB156_FURB156.py.snap b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB156_FURB156.py.snap index 97ae575aadee1..028362aca5bc5 100644 --- a/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB156_FURB156.py.snap +++ b/crates/ruff_linter/src/rules/refurb/snapshots/ruff_linter__rules__refurb__tests__FURB156_FURB156.py.snap @@ -219,7 +219,7 @@ FURB156.py:12:11: FURB156 [*] Use of hardcoded string charset 13 |+_ = "" in string.digits 13 14 | _ = "" in "12345670" 14 15 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' -15 16 | +15 16 | _ = ( FURB156.py:13:11: FURB156 [*] Use of hardcoded string charset | @@ -227,6 +227,7 @@ FURB156.py:13:11: FURB156 [*] Use of hardcoded string charset 13 | _ = "" in "12345670" | ^^^^^^^^^^ FURB156 14 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' +15 | _ = ( | = help: Replace hardcoded charset with `string.octdigits` @@ -243,8 +244,8 @@ FURB156.py:13:11: FURB156 [*] Use of hardcoded string charset 13 |-_ = "" in "12345670" 14 |+_ = "" in string.octdigits 14 15 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' -15 16 | -16 17 | # Ok +15 16 | _ = ( +16 17 | '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&' FURB156.py:14:5: FURB156 [*] Use of hardcoded string charset | @@ -252,8 +253,8 @@ FURB156.py:14:5: FURB156 [*] Use of hardcoded string charset 13 | _ = "" in "12345670" 14 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ FURB156 -15 | -16 | # Ok +15 | _ = ( +16 | '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&' | = help: Replace hardcoded charset with `string.printable` @@ -269,6 +270,106 @@ FURB156.py:14:5: FURB156 [*] Use of hardcoded string charset 13 14 | _ = "" in "12345670" 14 |-_ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' 15 |+_ = string.printable -15 16 | -16 17 | # Ok -17 18 | +15 16 | _ = ( +16 17 | '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&' +17 18 | "'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c" + +FURB156.py:15:5: FURB156 [*] Use of hardcoded string charset + | +13 | _ = "" in "12345670" +14 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' +15 | _ = ( + | _____^ +16 | | '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&' +17 | | "'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c" +18 | | ) + | |_^ FURB156 +19 | _ = id("0123" +20 | "4567" + | + = help: Replace hardcoded charset with `string.printable` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +-------------------------------------------------------------------------------- +12 13 | _ = "" in "1234567890" +13 14 | _ = "" in "12345670" +14 15 | _ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c' +15 |-_ = ( +16 |- '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&' +17 |- "'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c" +18 |-) + 16 |+_ = string.printable +19 17 | _ = id("0123" +20 18 | "4567" +21 19 | "89") + +FURB156.py:19:8: FURB156 [*] Use of hardcoded string charset + | +17 | "'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c" +18 | ) +19 | _ = id("0123" + | ________^ +20 | | "4567" +21 | | "89") + | |___________^ FURB156 +22 | _ = "" in ("123" +23 | "456" + | + = help: Replace hardcoded charset with `string.digits` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +-------------------------------------------------------------------------------- +16 17 | '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&' +17 18 | "'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c" +18 19 | ) +19 |-_ = id("0123" +20 |- "4567" +21 |- "89") + 20 |+_ = id(string.digits) +22 21 | _ = "" in ("123" +23 22 | "456" +24 23 | "789" + +FURB156.py:22:11: FURB156 [*] Use of hardcoded string charset + | +20 | "4567" +21 | "89") +22 | _ = "" in ("123" + | ___________^ +23 | | "456" +24 | | "789" +25 | | "0") + | |_______________^ FURB156 +26 | +27 | # Ok + | + = help: Replace hardcoded charset with `string.digits` + +ℹ Safe fix +1 1 | # Errors + 2 |+import string +2 3 | +3 4 | _ = "0123456789" +4 5 | _ = "01234567" +-------------------------------------------------------------------------------- +19 20 | _ = id("0123" +20 21 | "4567" +21 22 | "89") +22 |-_ = "" in ("123" +23 |- "456" +24 |- "789" +25 |- "0") + 23 |+_ = "" in string.digits +26 24 | +27 25 | # Ok +28 26 | From aa0a81a05f70c39254444c0c421e729791960519 Mon Sep 17 00:00:00 2001 From: Aleksei Latyshev Date: Mon, 30 Sep 2024 14:21:29 +0200 Subject: [PATCH 3/3] [`refurb`] FURB156: simplify AsciiCharSet interface --- .../refurb/rules/hardcoded_string_charset.rs | 33 +++++-------------- 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs b/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs index 219dcd818be19..fba4939f81f42 100644 --- a/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs +++ b/crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs @@ -58,20 +58,9 @@ struct AsciiCharSet(u128); impl AsciiCharSet { /// Creates the set of ascii characters from `bytes`. /// Returns None if there is non-ascii byte. - fn from_bytes(bytes: &[u8]) -> Option { - bytes - .iter() - .try_fold(0, |acc, &byte| byte.is_ascii().then(|| acc | (1 << byte))) - .map(Self) - } - - /// Creates the set of ascii characters from `bytes`. - /// Returns None if there is non-ascii byte. - const fn from_bytes_const(bytes: &[u8]) -> Option { - // TODO: remove in favor of [`Self::from_bytes`], when its implementation will be - // supported in `const` fn - // - https://github.com/rust-lang/rust/issues/67792 - // - https://github.com/rust-lang/rust/issues/87575 + const fn from_bytes(bytes: &[u8]) -> Option { + // TODO: simplify implementation, when const-traits are supported + // https://github.com/rust-lang/rust-project-goals/issues/106 let mut bitset = 0; let mut i = 0; while i < bytes.len() { @@ -83,15 +72,6 @@ impl AsciiCharSet { } Some(Self(bitset)) } - - const fn from_bytes_const_unwrap(bytes: &[u8]) -> Self { - // TODO: replace with .unwrap() in the caller, when Option::unwrap will be stable in `const fn` - // - https://github.com/rust-lang/rust/issues/67441) - match Self::from_bytes_const(bytes) { - Some(res) => res, - None => unreachable!(), - } - } } impl NamedCharset { @@ -100,7 +80,12 @@ impl NamedCharset { name, bytes, // SAFETY: The named charset is guaranteed to have only ascii bytes. - ascii_char_set: AsciiCharSet::from_bytes_const_unwrap(bytes), + // TODO: replace with `.unwrap()`, when `Option::unwrap` will be stable in `const fn` + // https://github.com/rust-lang/rust/issues/67441 + ascii_char_set: match AsciiCharSet::from_bytes(bytes) { + Some(ascii_char_set) => ascii_char_set, + None => unreachable!(), + }, } } }