[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9f8228c20b8849c068e107bcbcb1f24e1cbd36bb.1769673038.git.mchehab+huawei@kernel.org>
Date: Thu, 29 Jan 2026 09:08:19 +0100
From: Mauro Carvalho Chehab <mchehab+huawei@...nel.org>
To: Jonathan Corbet <corbet@....net>,
Linux Doc Mailing List <linux-doc@...r.kernel.org>
Cc: Mauro Carvalho Chehab <mchehab+huawei@...nel.org>,
bpf@...r.kernel.org,
intel-wired-lan@...ts.osuosl.org,
linux-hardening@...r.kernel.org,
linux-kernel@...r.kernel.org,
netdev@...r.kernel.org,
"Gustavo A. R. Silva" <gustavoars@...nel.org>,
Kees Cook <kees@...nel.org>,
Mauro Carvalho Chehab <mchehab@...nel.org>,
Randy Dunlap <rdunlap@...radead.org>,
Shuah Khan <skhan@...uxfoundation.org>
Subject: [PATCH v3 28/30] docs: kdoc_parser: move transform lists to a separate file
Over the time, most of the changes at kernel-doc are related
to maintaining a list of transforms to convert macros into pure
C code.
Place such transforms on a separate module, to cleanup the
parser module.
While here, drop the now obsolete comment about the two-steps
logic to handle struct_group macros.
There is an advantage on that: QEMU also uses our own kernel-doc,
but the xforms list there is different. By placing it on a
separate module, we can minimize the differences and make it
easier to keep QEMU in sync with Kernel upstream.
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@...nel.org>
---
Documentation/tools/kdoc_parser.rst | 8 ++
tools/lib/python/kdoc/kdoc_files.py | 3 +-
tools/lib/python/kdoc/kdoc_parser.py | 147 ++------------------------
tools/lib/python/kdoc/xforms_lists.py | 117 ++++++++++++++++++++
4 files changed, 133 insertions(+), 142 deletions(-)
create mode 100644 tools/lib/python/kdoc/xforms_lists.py
diff --git a/Documentation/tools/kdoc_parser.rst b/Documentation/tools/kdoc_parser.rst
index 03ee54a1b1cc..55b202173195 100644
--- a/Documentation/tools/kdoc_parser.rst
+++ b/Documentation/tools/kdoc_parser.rst
@@ -4,6 +4,14 @@
Kernel-doc parser stage
=======================
+C replacement rules used by the parser
+======================================
+
+.. automodule:: lib.python.kdoc.xforms_lists
+ :members:
+ :show-inheritance:
+ :undoc-members:
+
File handler classes
====================
diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
index 022487ea2cc6..7357c97a4b01 100644
--- a/tools/lib/python/kdoc/kdoc_files.py
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -15,6 +15,7 @@ import os
import re
from kdoc.kdoc_parser import KernelDoc
+from kdoc.xforms_lists import CTransforms
from kdoc.kdoc_output import OutputFormat
@@ -117,7 +118,7 @@ class KernelFiles():
if fname in self.files:
return
- doc = KernelDoc(self.config, fname)
+ doc = KernelDoc(self.config, fname, CTransforms)
export_table, entries = doc.parse_kdoc()
self.export_table[fname] = export_table
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
index 1e8e156e2a9e..a280fe581937 100644
--- a/tools/lib/python/kdoc/kdoc_parser.py
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -75,142 +75,6 @@ doc_begin_func = KernRe(str(doc_com) + # initial " * '
#
struct_args_pattern = r'([^,)]+)'
-struct_xforms = [
- # Strip attributes
- (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
- (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
- (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '),
- (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '),
- (KernRe(r'\s*__packed\s*', re.S), ' '),
- (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
- (KernRe(r'\s*__private', re.S), ' '),
- (KernRe(r'\s*__rcu', re.S), ' '),
- (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
- (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
- (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
- #
- # Unwrap struct_group macros based on this definition:
- # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
- # which has variants like: struct_group(NAME, MEMBERS...)
- # Only MEMBERS arguments require documentation.
- #
- # Parsing them happens on two steps:
- #
- # 1. drop struct group arguments that aren't at MEMBERS,
- # storing them as STRUCT_GROUP(MEMBERS)
- #
- # 2. remove STRUCT_GROUP() ancillary macro.
- #
- # The original logic used to remove STRUCT_GROUP() using an
- # advanced regex:
- #
- # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
- #
- # with two patterns that are incompatible with
- # Python re module, as it has:
- #
- # - a recursive pattern: (?1)
- # - an atomic grouping: (?>...)
- #
- # I tried a simpler version: but it didn't work either:
- # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
- #
- # As it doesn't properly match the end parenthesis on some cases.
- #
- # So, a better solution was crafted: there's now a CFunction
- # class that ensures that delimiters after a search are properly
- # matched. So, the implementation to drop STRUCT_GROUP() will be
- # handled in separate.
- #
- (CFunction('struct_group'), r'\2'),
- (CFunction('struct_group_attr'), r'\3'),
- (CFunction('struct_group_tagged'), r'struct \1 \2; \3'),
- (CFunction('__struct_group'), r'\4'),
-
- #
- # Replace macros
- #
- # TODO: use CFunction on all FOO($1, $2, ...) matches
- #
- # it is better to also move those to the CFunction logic,
- # to ensure that parentheses will be properly matched.
- #
- (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
- r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
- (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
- r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
- (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
- re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
- (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
- re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
- (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
- r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
- (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
- struct_args_pattern + r'\)', re.S), r'\2 *\1'),
- (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
- struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
- (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
- (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
- (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
-]
-
-#
-# Transforms for function prototypes
-#
-function_xforms = [
- (KernRe(r"^static +"), ""),
- (KernRe(r"^extern +"), ""),
- (KernRe(r"^asmlinkage +"), ""),
- (KernRe(r"^inline +"), ""),
- (KernRe(r"^__inline__ +"), ""),
- (KernRe(r"^__inline +"), ""),
- (KernRe(r"^__always_inline +"), ""),
- (KernRe(r"^noinline +"), ""),
- (KernRe(r"^__FORTIFY_INLINE +"), ""),
- (KernRe(r"__init +"), ""),
- (KernRe(r"__init_or_module +"), ""),
- (KernRe(r"__deprecated +"), ""),
- (KernRe(r"__flatten +"), ""),
- (KernRe(r"__meminit +"), ""),
- (KernRe(r"__must_check +"), ""),
- (KernRe(r"__weak +"), ""),
- (KernRe(r"__sched +"), ""),
- (KernRe(r"_noprof"), ""),
- (KernRe(r"__always_unused *"), ""),
- (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
- (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
- (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
- (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
- (KernRe(r"__no_context_analysis\s*"), ""),
- (KernRe(r"__attribute_const__ +"), ""),
- (CFunction("__cond_acquires"), ""),
- (CFunction("__cond_releases"), ""),
- (CFunction("__acquires"), ""),
- (CFunction("__releases"), ""),
- (CFunction("__must_hold"), ""),
- (CFunction("__must_not_hold"), ""),
- (CFunction("__must_hold_shared"), ""),
- (CFunction("__cond_acquires_shared"), ""),
- (CFunction("__acquires_shared"), ""),
- (CFunction("__releases_shared"), ""),
- (CFunction("__attribute__"), ""),
-]
-
-#
-# Transforms for variable prototypes
-#
-var_xforms = [
- (KernRe(r"__read_mostly"), ""),
- (KernRe(r"__ro_after_init"), ""),
- (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""),
- (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""),
- (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
- (KernRe(r"(?://.*)$"), ""),
- (KernRe(r"(?:/\*.*\*/)"), ""),
- (KernRe(r";$"), ""),
-]
#
# Ancillary functions
@@ -394,11 +258,12 @@ class KernelDoc:
#: String to write when a parameter is not described.
undescribed = "-- undescribed --"
- def __init__(self, config, fname):
+ def __init__(self, config, fname, xforms):
"""Initialize internal variables"""
self.fname = fname
self.config = config
+ self.xforms = xforms
# Initial state for the state machines
self.state = state.NORMAL
@@ -889,7 +754,7 @@ class KernelDoc:
# Go through the list of members applying all of our transformations.
#
members = trim_private_members(members)
- members = self.apply_transforms(struct_xforms, members)
+ members = self.apply_transforms(self.xforms.struct_xforms, members)
#
# Deal with embedded struct and union members, and drop enums entirely.
@@ -1011,8 +876,7 @@ class KernelDoc:
# Drop comments and macros to have a pure C prototype
#
if not declaration_name:
- for r, sub in var_xforms:
- proto = r.sub(sub, proto)
+ proto = self.apply_transforms(self.xforms.var_xforms, proto)
proto = proto.rstrip()
@@ -1104,7 +968,8 @@ class KernelDoc:
#
# Apply the initial transformations.
#
- prototype = self.apply_transforms(function_xforms, prototype)
+ prototype = self.apply_transforms(self.xforms.function_xforms,
+ prototype)
# Yes, this truly is vile. We are looking for:
# 1. Return type (may be nothing if we're looking at a macro)
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
new file mode 100644
index 000000000000..88968bafdb78
--- /dev/null
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@...nel.org>.
+
+import re
+
+from kdoc.kdoc_re import CFunction, KernRe
+
+struct_args_pattern = r'([^,)]+)'
+
+class CTransforms:
+ """
+ Data class containing a long set of transformations to turn
+ structure member prefixes, and macro invocations and variables
+ into something we can parse and generate kdoc for.
+ """
+
+ #: Transforms for structs and unions
+ struct_xforms = [
+ # Strip attributes
+ (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
+ (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ' '),
+ (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ' '),
+ (KernRe(r'\s*__packed\s*', re.S), ' '),
+ (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
+ (KernRe(r'\s*__private', re.S), ' '),
+ (KernRe(r'\s*__rcu', re.S), ' '),
+ (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
+ (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
+ (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
+
+ (CFunction('struct_group'), r'\2'),
+ (CFunction('struct_group_attr'), r'\3'),
+ (CFunction('struct_group_tagged'), r'struct \1 \2; \3'),
+ (CFunction('__struct_group'), r'\4'),
+
+ #
+ # Replace macros
+ #
+ # TODO: use CFunction on all FOO($1, $2, ...) matches
+ #
+ # it is better to also move those to the CFunction logic,
+ # to ensure that parentheses will be properly matched.
+ #
+ (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
+ r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
+ (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
+ r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
+ (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+ re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
+ (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+ re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
+ (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
+ r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+ (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
+ struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+ (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
+ struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
+ (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
+ (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
+ (KernRe(r'VIRTIO_DECLARE_FEATURES\(([\w_]+)\)'), r'union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }'),
+ ]
+
+ #: Transforms for function prototypes
+ function_xforms = [
+ (KernRe(r"^static +"), ""),
+ (KernRe(r"^extern +"), ""),
+ (KernRe(r"^asmlinkage +"), ""),
+ (KernRe(r"^inline +"), ""),
+ (KernRe(r"^__inline__ +"), ""),
+ (KernRe(r"^__inline +"), ""),
+ (KernRe(r"^__always_inline +"), ""),
+ (KernRe(r"^noinline +"), ""),
+ (KernRe(r"^__FORTIFY_INLINE +"), ""),
+ (KernRe(r"__init +"), ""),
+ (KernRe(r"__init_or_module +"), ""),
+ (KernRe(r"__deprecated +"), ""),
+ (KernRe(r"__flatten +"), ""),
+ (KernRe(r"__meminit +"), ""),
+ (KernRe(r"__must_check +"), ""),
+ (KernRe(r"__weak +"), ""),
+ (KernRe(r"__sched +"), ""),
+ (KernRe(r"_noprof"), ""),
+ (KernRe(r"__always_unused *"), ""),
+ (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
+ (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
+ (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
+ (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
+ (KernRe(r"__no_context_analysis\s*"), ""),
+ (KernRe(r"__attribute_const__ +"), ""),
+ (CFunction("__cond_acquires"), ""),
+ (CFunction("__cond_releases"), ""),
+ (CFunction("__acquires"), ""),
+ (CFunction("__releases"), ""),
+ (CFunction("__must_hold"), ""),
+ (CFunction("__must_not_hold"), ""),
+ (CFunction("__must_hold_shared"), ""),
+ (CFunction("__cond_acquires_shared"), ""),
+ (CFunction("__acquires_shared"), ""),
+ (CFunction("__releases_shared"), ""),
+ (CFunction("__attribute__"), ""),
+ ]
+
+ #: Transforms for variables
+ var_xforms = [
+ (KernRe(r"__read_mostly"), ""),
+ (KernRe(r"__ro_after_init"), ""),
+ (KernRe(r'\s*__guarded_by\s*\([^\)]*\)', re.S), ""),
+ (KernRe(r'\s*__pt_guarded_by\s*\([^\)]*\)', re.S), ""),
+ (KernRe(r"LIST_HEAD\(([\w_]+)\)"), r"struct list_head \1"),
+ (KernRe(r"(?://.*)$"), ""),
+ (KernRe(r"(?:/\*.*\*/)"), ""),
+ (KernRe(r";$"), ""),
+ ]
--
2.52.0
Powered by blists - more mailing lists