Skip to content

Commit

Permalink
[refurb] implement hardcoded-string-charset (FURB156)
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-700 committed Sep 26, 2024
1 parent 58a8e9c commit 40149a1
Show file tree
Hide file tree
Showing 9 changed files with 474 additions and 1 deletion.
20 changes: 20 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/refurb/FURB156.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Errors

_ = "0123456789"
_ = "01234567"
_ = "0123456789abcdefABCDEF"
_ = "abcdefghijklmnopqrstuvwxyz"
_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
_ = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
_ = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
_ = " \t\n\r\v\f"

_ = "" in "1234567890"
_ = "" in "12345670"
_ = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'

# Ok

_ = "1234567890"
_ = "1234"
_ = "" in "1234"
3 changes: 3 additions & 0 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1355,6 +1355,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::SingleItemMembershipTest) {
refurb::rules::single_item_membership_test(checker, expr, left, ops, comparators);
}
if checker.enabled(Rule::HardcodedStringCharset) {
refurb::rules::hardcoded_string_charset_comparison(checker, ops, comparators);
}
}
Expr::NumberLiteral(number_literal @ ast::ExprNumberLiteral { .. }) => {
if checker.source_type.is_stub() && checker.enabled(Rule::NumericLiteralTooLong) {
Expand Down
5 changes: 4 additions & 1 deletion crates/ruff_linter/src/checkers/ast/analyze/string_like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use ruff_python_ast::StringLike;

use crate::checkers::ast::Checker;
use crate::codes::Rule;
use crate::rules::{flake8_bandit, flake8_pyi, flake8_quotes, pycodestyle, ruff};
use crate::rules::{flake8_bandit, flake8_pyi, flake8_quotes, pycodestyle, refurb, ruff};

/// Run lint rules over a [`StringLike`] syntax nodes.
pub(crate) fn string_like(string_like: StringLike, checker: &mut Checker) {
Expand Down Expand Up @@ -39,4 +39,7 @@ pub(crate) fn string_like(string_like: StringLike, checker: &mut Checker) {
if checker.enabled(Rule::InvalidEscapeSequence) {
pycodestyle::rules::invalid_escape_sequence(checker, string_like);
}
if checker.enabled(Rule::HardcodedStringCharset) {
refurb::rules::hardcoded_string_charset_literal(checker, string_like);
}
}
1 change: 1 addition & 0 deletions crates/ruff_linter/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
(Refurb, "148") => (RuleGroup::Preview, rules::refurb::rules::UnnecessaryEnumerate),
(Refurb, "152") => (RuleGroup::Preview, rules::refurb::rules::MathConstant),
(Refurb, "154") => (RuleGroup::Preview, rules::refurb::rules::RepeatedGlobal),
(Refurb, "156") => (RuleGroup::Preview, rules::refurb::rules::HardcodedStringCharset),
(Refurb, "157") => (RuleGroup::Preview, rules::refurb::rules::VerboseDecimalConstructor),
(Refurb, "161") => (RuleGroup::Stable, rules::refurb::rules::BitCount),
(Refurb, "163") => (RuleGroup::Stable, rules::refurb::rules::RedundantLogBase),
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/rules/refurb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mod tests {
#[test_case(Rule::UnnecessaryEnumerate, Path::new("FURB148.py"))]
#[test_case(Rule::MathConstant, Path::new("FURB152.py"))]
#[test_case(Rule::RepeatedGlobal, Path::new("FURB154.py"))]
#[test_case(Rule::HardcodedStringCharset, Path::new("FURB156.py"))]
#[test_case(Rule::VerboseDecimalConstructor, Path::new("FURB157.py"))]
#[test_case(Rule::UnnecessaryFromFloat, Path::new("FURB164.py"))]
#[test_case(Rule::PrintEmptyString, Path::new("FURB105.py"))]
Expand Down
168 changes: 168 additions & 0 deletions crates/ruff_linter/src/rules/refurb/rules/hardcoded_string_charset.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
use crate::checkers::ast::Checker;
use crate::importer::ImportRequest;
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{CmpOp, Expr, ExprStringLiteral, StringLike};
use ruff_text_size::{Ranged, TextRange};

/// ## What it does
/// Checks for uses of hardcoded charsets, which are defined in Python string module.
///
/// ## Why is this bad?
/// Usage of named charsets from the standard library is more readable and less error-prone.
///
/// ## Example
/// ```python
/// x = "0123456789"
/// y in "abcdefghijklmnopqrstuvwxyz"
/// ```
///
/// Use instead
/// ```python
/// import string
///
/// x = string.digits
/// y in string.ascii_lowercase
/// ```
///
/// ## References
/// - [Python documentation: String constants](https://docs.python.org/3/library/string.html#string-constants)
#[violation]
pub struct HardcodedStringCharset {
name: &'static str,
}

impl AlwaysFixableViolation for HardcodedStringCharset {
#[derive_message_formats]
fn message(&self) -> String {
format!("Use of hardcoded string charset")
}

fn fix_title(&self) -> String {
let HardcodedStringCharset { name } = self;
format!("Replace hardcoded charset with `string.{name}`")
}
}

struct Charset {
name: &'static str,
bytes: &'static [u8],
bitset: u128,
}

impl Charset {
const fn new(name: &'static str, value: &'static str) -> Self {
let bytes = value.as_bytes();

// let bitset = bytes.iter().fold(0, |acc, &byte| acc | (1 << byte));
// TODO: replace with ^ or for-loop, when these features will be supported in `const` fn
// - https://github.com/rust-lang/rust/issues/67792
// - https://github.com/rust-lang/rust/issues/87575
let mut bitset = 0;
let mut i = 0;
while i < bytes.len() {
bitset |= 1 << bytes[i];
i += 1;
}

Self {
name,
bytes,
bitset,
}
}
}

const KNOWN_CHARSETS: [Charset; 9] = [
Charset::new(
"ascii_letters",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
),
Charset::new("ascii_lowercase", "abcdefghijklmnopqrstuvwxyz"),
Charset::new("ascii_uppercase", "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
Charset::new("digits", "0123456789"),
Charset::new("hexdigits", "0123456789abcdefABCDEF"),
Charset::new("octdigits", "01234567"),
Charset::new("punctuation", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"),
Charset::new(
"printable",
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"\
#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c",
),
Charset::new("whitespace", " \t\n\r\x0b\x0c"),
];

fn check_charset_as_set(s: &str) -> Option<&Charset> {
if !s.is_ascii() {
return None;
}
let bitset = s
.as_bytes()
.iter()
.try_fold(0, |acc, &byte| (byte < 128).then(|| acc | (1_u128 << byte)))?;

KNOWN_CHARSETS
.iter()
.find(|&charset| charset.bitset == bitset)
}

fn check_charset_exact(bytes: &[u8]) -> Option<&Charset> {
KNOWN_CHARSETS
.iter()
.find(|&charset| charset.bytes == bytes)
}

fn push_diagnostic(checker: &mut Checker, range: TextRange, charset: &Charset) {
let name = charset.name;
let mut diagnostic = Diagnostic::new(HardcodedStringCharset { name }, range);
diagnostic.try_set_fix(|| {
let (edit, binding) = checker.importer().get_or_import_symbol(
&ImportRequest::import("string", name),
range.start(),
checker.semantic(),
)?;
Ok(Fix::safe_edits(
Edit::range_replacement(binding, range),
[edit],
))
});
checker.diagnostics.push(diagnostic);
}

/// FURB156
pub(crate) fn hardcoded_string_charset_comparison(
checker: &mut Checker,
ops: &[CmpOp],
comparators: &[Expr],
) {
let ([op], [comp]) = (ops, comparators) else {
return;
};
if !matches!(op, CmpOp::In | CmpOp::NotIn) {
return;
}
let Expr::StringLiteral(ExprStringLiteral { value, .. }) = comp else {
return;
};
let Some(charset) = check_charset_as_set(value.to_str()) else {
return;
};

// In this case the diagnostic will be emitted via string_like check.
if check_charset_exact(value.to_str().as_bytes()).is_some() {
return;
}

push_diagnostic(checker, comp.range(), charset);
}

/// FURB156
pub(crate) fn hardcoded_string_charset_literal(checker: &mut Checker, string_like: StringLike) {
let StringLike::String(expr) = string_like else {
return;
};
let Some(charset) = check_charset_exact(expr.value.to_str().as_bytes()) else {
return;
};
push_diagnostic(checker, string_like.range(), charset);
}
2 changes: 2 additions & 0 deletions crates/ruff_linter/src/rules/refurb/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub(crate) use check_and_remove_from_set::*;
pub(crate) use delete_full_slice::*;
pub(crate) use for_loop_set_mutations::*;
pub(crate) use fstring_number_format::*;
pub(crate) use hardcoded_string_charset::*;
pub(crate) use hashlib_digest_hex::*;
pub(crate) use if_exp_instead_of_or_operator::*;
pub(crate) use if_expr_min_max::*;
Expand Down Expand Up @@ -36,6 +37,7 @@ mod check_and_remove_from_set;
mod delete_full_slice;
mod for_loop_set_mutations;
mod fstring_number_format;
mod hardcoded_string_charset;
mod hashlib_digest_hex;
mod if_exp_instead_of_or_operator;
mod if_expr_min_max;
Expand Down
Loading

0 comments on commit 40149a1

Please sign in to comment.