[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20250815075726.135806-1-zhangdandan@uniontech.com>
Date: Fri, 15 Aug 2025 15:57:26 +0800
From: Morduang Zang <zhangdandan@...ontech.com>
To: apw@...onical.com,
joe@...ches.com,
dwaipayanray1@...il.com,
lukas.bulwahn@...il.com
Cc: linux-kernel@...r.kernel.org,
wangyuli@...ontech.com,
zhanjun@...ontech.com,
niecheng1@...ontech.com,
Morduang Zang <zhangdandan@...ontech.com>
Subject: [PATCH RESEND] checkpatch: Add full-width character detection Add comprehensive detection and automatic fixing capability for full-width (Unicode) characters that are commonly mistaken for ASCII punctuation marks. This helps catch input method editor artifacts that can cause compilation errors or formatting issues.
The implementation detects 25 types of full-width characters:
- Basic punctuation: ;,。()!?:
- Programming brackets: []{}<>
- Assignment and comparison: =
- Arithmetic operators: +-*/\
- Other programming symbols: %#&|
Detection covers three areas:
1. Code lines (lines starting with '+') - FULLWIDTH_CHARS
2. Commit messages - FULLWIDTH_CHARS_COMMIT
3. Subject lines - FULLWIDTH_CHARS_SUBJECT
Example usage:
./scripts/checkpatch.pl my_patch.patch
./scripts/checkpatch.pl --fix my_patch.patch
./scripts/checkpatch.pl --fix-inplace my_source.c
Signed-off-by: Morduang Zang <zhangdandan@...ontech.com>
Signed-off-by: Wangyuli <wangyuli@...ontech.com>
---
scripts/checkpatch.pl | 84 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 84 insertions(+)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e722dd6fa8ef..f4cb547a470b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -75,6 +75,41 @@ my $git_command ='export LANGUAGE=en_US.UTF-8; git';
my $tabsize = 8;
my ${CONFIG_} = "CONFIG_";
+# Full-width character mappings (UTF-8 byte sequences to ASCII)
+my %fullwidth_chars = (
+ # Basic punctuation
+ "\xef\xbc\x9b" => [";", "semicolon", ";"],
+ "\xef\xbc\x8c" => [",", "comma", ","],
+ "\xe3\x80\x82" => [".", "period", "。"],
+ "\xef\xbc\x88" => ["(", "opening parenthesis", "("],
+ "\xef\xbc\x89" => [")", "closing parenthesis", ")"],
+ "\xef\xbc\x81" => ["!", "exclamation mark", "!"],
+ "\xef\xbc\x9f" => ["?", "question mark", "?"],
+ "\xef\xbc\x9a" => [":", "colon", ":"],
+ "\xe3\x80\x80" => [" ", "space", " "],
+ # Programming brackets
+ "\xef\xbc\xbb" => ["[", "left square bracket", "["],
+ "\xef\xbc\xbd" => ["]", "right square bracket", "]"],
+ "\xef\xbd\x9b" => ["{", "left curly bracket", "{"],
+ "\xef\xbd\x9d" => ["}", "right curly bracket", "}"],
+ "\xef\xbc\x9c" => ["<", "less-than sign", "<"],
+ "\xef\xbc\x9e" => [">", "greater-than sign", ">"],
+ # Assignment and comparison
+ "\xef\xbc\x9d" => ["=", "equals sign", "="],
+ # Arithmetic operators
+ "\xef\xbc\x8b" => ["+", "plus sign", "+"],
+ "\xef\xbc\x8d" => ["-", "minus sign", "-"],
+ "\xef\xbc\x8a" => ["*", "asterisk", "*"],
+ "\xef\xbc\x8f" => ["/", "solidus", "/"],
+ "\xef\xbc\xbc" => ["\\", "reverse solidus", "\"],
+ # Other programming symbols
+ "\xef\xbc\x85" => ["%", "percent sign", "%"],
+ "\xef\xbc\x83" => ["#", "number sign", "#"],
+ "\xef\xbc\x86" => ["&", "ampersand", "&"],
+ "\xef\xbd\x9c" => ["|", "vertical line", "|"],
+);
+my $fullwidth_pattern = join('|', map { quotemeta($_) } keys %fullwidth_chars);
+
my %maybe_linker_symbol; # for externs in c exceptions, when seen in *vmlinux.lds.h
sub help {
@@ -1019,6 +1054,40 @@ sub read_words {
return 0;
}
+# Check for full-width characters and optionally fix them
+sub check_fullwidth_chars {
+ my ($line, $context, $warning_type, $apply_fix, $fixlinenr, $fixed_ref, $herecurr) = @_;
+ my @found_chars = ();
+ my $fixed_line = $line;
+ my $has_fixes = 0;
+
+ return 0 unless $line =~ /$fullwidth_pattern/o;
+
+ if ($apply_fix) {
+ $fixed_line =~ s/($fullwidth_pattern)/$fullwidth_chars{$1}[0]/ge;
+ $has_fixes = ($fixed_line ne $line);
+ }
+
+ while ($line =~ /($fullwidth_pattern)/go) {
+ my $fullwidth_byte_seq = $1;
+ if (exists $fullwidth_chars{$fullwidth_byte_seq}) {
+ my ($ascii_char, $name, $fullwidth_char) = @{$fullwidth_chars{$fullwidth_byte_seq}};
+ push @found_chars, "Full-width $name ($fullwidth_char) found$context, use ASCII $name ($ascii_char) instead";
+ }
+ }
+
+ if (@found_chars) {
+ foreach my $msg (@found_chars) {
+ WARN($warning_type, $msg . "\n" . $herecurr);
+ }
+ if ($apply_fix && $has_fixes && defined $fixed_ref) {
+ $fixed_ref->[$fixlinenr] = $fixed_line;
+ }
+ }
+
+ return scalar @found_chars;
+}
+
my $const_structs;
if (show_type("CONST_STRUCT")) {
read_words(\$const_structs, $conststructsfile)
@@ -2961,6 +3030,11 @@ sub process {
$commit_log_has_diff = 1;
}
+# Check for full-width characters in commit message
+ if ($in_commit_log && show_type("FULLWIDTH_CHARS_COMMIT")) {
+ check_fullwidth_chars($rawline, " in commit message", "FULLWIDTH_CHARS_COMMIT", 0, 0, undef, $herecurr);
+ }
+
# Check for incorrect file permissions
if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
my $permhere = $here . "FILE: $realfile\n";
@@ -3266,6 +3340,11 @@ sub process {
"A patch subject line should describe the change not the tool that found it\n" . $herecurr);
}
+# Check for full-width characters in Subject line
+ if ($in_header_lines && $line =~ /^Subject:/i && show_type("FULLWIDTH_CHARS_SUBJECT")) {
+ check_fullwidth_chars($rawline, " in subject line", "FULLWIDTH_CHARS_SUBJECT", 0, 0, undef, $herecurr);
+ }
+
# Check for Gerrit Change-Ids not in any patch context
if ($realfile eq '' && !$has_patch_separator && $line =~ /^\s*change-id:/i) {
if (ERROR("GERRIT_CHANGE_ID",
@@ -3974,6 +4053,11 @@ sub process {
}
}
+# check for full-width characters (full-width punctuation marks, etc.)
+ if ($rawline =~ /^\+/ && show_type("FULLWIDTH_CHARS")) {
+ check_fullwidth_chars($rawline, "", "FULLWIDTH_CHARS", $fix, $fixlinenr, \@fixed, $herecurr);
+ }
+
# check multi-line statement indentation matches previous line
if ($perl_version_ok &&
$prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|(?:\*\s*)*$Lval\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
--
2.20.1
Powered by blists - more mailing lists