[<prev] [next>] [day] [month] [year] [list]
Message-ID: <2804E0A754F9E415+20250708093458.1230294-1-wangyuli@uniontech.com>
Date: Tue, 8 Jul 2025 17:34:58 +0800
From: WangYuli <wangyuli@...ontech.com>
To: apw@...onical.com,
joe@...ches.com,
dwaipayanray1@...il.com,
lukas.bulwahn@...il.com
Cc: linux-kernel@...r.kernel.org,
zhanjun@...ontech.com,
niecheng1@...ontech.com,
guanwentao@...ontech.com,
WangYuli <wangyuli@...ontech.com>
Subject: [PATCH] checkpatch: Add full-width character detection
Add comprehensive detection and automatic fixing capability for full-width
(Unicode) characters that are commonly mistaken for ASCII punctuation marks.
This helps catch input method editor artifacts that can cause compilation
errors or formatting issues.
The implementation detects 25 types of full-width characters:
- Basic punctuation: ;,。()!?:
- Programming brackets: []{}<>
- Assignment and comparison: =
- Arithmetic operators: +-*/\
- Other programming symbols: %#&|
Detection covers three areas:
1. Code lines (lines starting with '+') - FULLWIDTH_CHARS
2. Commit messages - FULLWIDTH_CHARS_COMMIT
3. Subject lines - FULLWIDTH_CHARS_SUBJECT
Example usage:
./scripts/checkpatch.pl my_patch.patch
./scripts/checkpatch.pl --fix my_patch.patch
./scripts/checkpatch.pl --fix-inplace my_source.c
Signed-off-by: WangYuli <wangyuli@...ontech.com>
---
scripts/checkpatch.pl | 84 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 84 insertions(+)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 664f7b7a622c..bd691dc848a2 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -75,6 +75,41 @@ my $git_command ='export LANGUAGE=en_US.UTF-8; git';
my $tabsize = 8;
my ${CONFIG_} = "CONFIG_";
+# Full-width character mappings (UTF-8 byte sequences to ASCII)
+my %fullwidth_chars = (
+ # Basic punctuation
+ "\xef\xbc\x9b" => [";", "semicolon", ";"],
+ "\xef\xbc\x8c" => [",", "comma", ","],
+ "\xe3\x80\x82" => [".", "period", "。"],
+ "\xef\xbc\x88" => ["(", "opening parenthesis", "("],
+ "\xef\xbc\x89" => [")", "closing parenthesis", ")"],
+ "\xef\xbc\x81" => ["!", "exclamation mark", "!"],
+ "\xef\xbc\x9f" => ["?", "question mark", "?"],
+ "\xef\xbc\x9a" => [":", "colon", ":"],
+ "\xe3\x80\x80" => [" ", "space", " "],
+ # Programming brackets
+ "\xef\xbc\xbb" => ["[", "left square bracket", "["],
+ "\xef\xbc\xbd" => ["]", "right square bracket", "]"],
+ "\xef\xbd\x9b" => ["{", "left curly bracket", "{"],
+ "\xef\xbd\x9d" => ["}", "right curly bracket", "}"],
+ "\xef\xbc\x9c" => ["<", "less-than sign", "<"],
+ "\xef\xbc\x9e" => [">", "greater-than sign", ">"],
+ # Assignment and comparison
+ "\xef\xbc\x9d" => ["=", "equals sign", "="],
+ # Arithmetic operators
+ "\xef\xbc\x8b" => ["+", "plus sign", "+"],
+ "\xef\xbc\x8d" => ["-", "minus sign", "-"],
+ "\xef\xbc\x8a" => ["*", "asterisk", "*"],
+ "\xef\xbc\x8f" => ["/", "solidus", "/"],
+ "\xef\xbc\xbc" => ["\\", "reverse solidus", "\"],
+ # Other programming symbols
+ "\xef\xbc\x85" => ["%", "percent sign", "%"],
+ "\xef\xbc\x83" => ["#", "number sign", "#"],
+ "\xef\xbc\x86" => ["&", "ampersand", "&"],
+ "\xef\xbd\x9c" => ["|", "vertical line", "|"],
+);
+my $fullwidth_pattern = join('|', map { quotemeta($_) } keys %fullwidth_chars);
+
my %maybe_linker_symbol; # for externs in c exceptions, when seen in *vmlinux.lds.h
sub help {
@@ -1018,6 +1053,40 @@ sub read_words {
return 0;
}
+# Check for full-width characters and optionally fix them
+sub check_fullwidth_chars {
+ my ($line, $context, $warning_type, $apply_fix, $fixlinenr, $fixed_ref, $herecurr) = @_;
+ my @found_chars = ();
+ my $fixed_line = $line;
+ my $has_fixes = 0;
+
+ return 0 unless $line =~ /$fullwidth_pattern/o;
+
+ if ($apply_fix) {
+ $fixed_line =~ s/($fullwidth_pattern)/$fullwidth_chars{$1}[0]/ge;
+ $has_fixes = ($fixed_line ne $line);
+ }
+
+ while ($line =~ /($fullwidth_pattern)/go) {
+ my $fullwidth_byte_seq = $1;
+ if (exists $fullwidth_chars{$fullwidth_byte_seq}) {
+ my ($ascii_char, $name, $fullwidth_char) = @{$fullwidth_chars{$fullwidth_byte_seq}};
+ push @found_chars, "Full-width $name ($fullwidth_char) found$context, use ASCII $name ($ascii_char) instead";
+ }
+ }
+
+ if (@found_chars) {
+ foreach my $msg (@found_chars) {
+ WARN($warning_type, $msg . "\n" . $herecurr);
+ }
+ if ($apply_fix && $has_fixes && defined $fixed_ref) {
+ $fixed_ref->[$fixlinenr] = $fixed_line;
+ }
+ }
+
+ return scalar @found_chars;
+}
+
my $const_structs;
if (show_type("CONST_STRUCT")) {
read_words(\$const_structs, $conststructsfile)
@@ -2960,6 +3029,11 @@ sub process {
$commit_log_has_diff = 1;
}
+# Check for full-width characters in commit message
+ if ($in_commit_log && show_type("FULLWIDTH_CHARS_COMMIT")) {
+ check_fullwidth_chars($rawline, " in commit message", "FULLWIDTH_CHARS_COMMIT", 0, 0, undef, $herecurr);
+ }
+
# Check for incorrect file permissions
if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
my $permhere = $here . "FILE: $realfile\n";
@@ -3265,6 +3339,11 @@ sub process {
"A patch subject line should describe the change not the tool that found it\n" . $herecurr);
}
+# Check for full-width characters in Subject line
+ if ($in_header_lines && $line =~ /^Subject:/i && show_type("FULLWIDTH_CHARS_SUBJECT")) {
+ check_fullwidth_chars($rawline, " in subject line", "FULLWIDTH_CHARS_SUBJECT", 0, 0, undef, $herecurr);
+ }
+
# Check for Gerrit Change-Ids not in any patch context
if ($realfile eq '' && !$has_patch_separator && $line =~ /^\s*change-id:/i) {
if (ERROR("GERRIT_CHANGE_ID",
@@ -3960,6 +4039,11 @@ sub process {
}
}
+# check for full-width characters (full-width punctuation marks, etc.)
+ if ($rawline =~ /^\+/ && show_type("FULLWIDTH_CHARS")) {
+ check_fullwidth_chars($rawline, "", "FULLWIDTH_CHARS", $fix, $fixlinenr, \@fixed, $herecurr);
+ }
+
# check multi-line statement indentation matches previous line
if ($perl_version_ok &&
$prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|(?:\*\s*)*$Lval\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
--
2.50.0
Powered by blists - more mailing lists