lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260119064731.23879-5-luis.augenstein@tngtech.com>
Date: Mon, 19 Jan 2026 07:47:21 +0100
From: Luis Augenstein <luis.augenstein@...tech.com>
To: nathan@...nel.org,
	nsc@...nel.org
Cc: linux-kbuild@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	akpm@...ux-foundation.org,
	gregkh@...uxfoundation.org,
	maximilian.huber@...tech.com,
	Luis Augenstein <luis.augenstein@...tech.com>
Subject: [PATCH 04/14] tools/sbom: add cmd graph generation

Implement command graph generation by parsing .cmd files to build a
dependency graph.
Add CmdGraph, CmdGraphNode, and .cmd file parsing.
Supports generating a flat list of used source files via the
--generate-used-files cli argument.

Co-developed-by: Maximilian Huber <maximilian.huber@...tech.com>
Signed-off-by: Maximilian Huber <maximilian.huber@...tech.com>
Signed-off-by: Luis Augenstein <luis.augenstein@...tech.com>
---
 tools/sbom/Makefile                         |   6 +-
 tools/sbom/sbom.py                          |  39 +++++
 tools/sbom/sbom/cmd_graph/__init__.py       |   7 +
 tools/sbom/sbom/cmd_graph/cmd_file.py       | 149 ++++++++++++++++++++
 tools/sbom/sbom/cmd_graph/cmd_graph.py      |  46 ++++++
 tools/sbom/sbom/cmd_graph/cmd_graph_node.py | 120 ++++++++++++++++
 tools/sbom/sbom/cmd_graph/deps_parser.py    |  52 +++++++
 tools/sbom/sbom/config.py                   | 147 ++++++++++++++++++-
 8 files changed, 563 insertions(+), 3 deletions(-)
 create mode 100644 tools/sbom/sbom/cmd_graph/__init__.py
 create mode 100644 tools/sbom/sbom/cmd_graph/cmd_file.py
 create mode 100644 tools/sbom/sbom/cmd_graph/cmd_graph.py
 create mode 100644 tools/sbom/sbom/cmd_graph/cmd_graph_node.py
 create mode 100644 tools/sbom/sbom/cmd_graph/deps_parser.py

diff --git a/tools/sbom/Makefile b/tools/sbom/Makefile
index 5b80b455c..052858556 100644
--- a/tools/sbom/Makefile
+++ b/tools/sbom/Makefile
@@ -25,7 +25,11 @@ $(SBOM_TARGETS) &:
 		sed 's/\.o$$/.ko/' $(objtree)/modules.order >> $(SBOM_ROOTS_FILE); \
 	fi
 
-	@python3 sbom.py
+	@python3 sbom.py \
+      --src-tree $(srctree) \
+      --obj-tree $(objtree) \
+      --roots-file $(SBOM_ROOTS_FILE) \
+      --output-directory $(objtree)
 
 	@rm $(SBOM_ROOTS_FILE)
 
diff --git a/tools/sbom/sbom.py b/tools/sbom/sbom.py
index c7f23d6eb..25d912a28 100644
--- a/tools/sbom/sbom.py
+++ b/tools/sbom/sbom.py
@@ -7,9 +7,13 @@ Compute software bill of materials in SPDX format describing a kernel build.
 """
 
 import logging
+import os
 import sys
+import time
 import sbom.sbom_logging as sbom_logging
 from sbom.config import get_config
+from sbom.path_utils import is_relative_to
+from sbom.cmd_graph import CmdGraph
 
 
 def main():
@@ -22,6 +26,36 @@ def main():
         format="[%(levelname)s] %(message)s",
     )
 
+    # Build cmd graph
+    logging.debug("Start building cmd graph")
+    start_time = time.time()
+    cmd_graph = CmdGraph.create(config.root_paths, config)
+    logging.debug(f"Built cmd graph in {time.time() - start_time} seconds")
+
+    # Save used files document
+    if config.generate_used_files:
+        if config.src_tree == config.obj_tree:
+            logging.info(
+                f"Extracting all files from the cmd graph to {(config.used_files_file_name,)} "
+                "instead of only source files because source files cannot be "
+                "reliably classified when the source and object trees are identical.",
+            )
+            used_files = [os.path.relpath(node.absolute_path, config.src_tree) for node in cmd_graph]
+            logging.debug(f"Found {len(used_files)} files in cmd graph.")
+        else:
+            used_files = [
+                os.path.relpath(node.absolute_path, config.src_tree)
+                for node in cmd_graph
+                if is_relative_to(node.absolute_path, config.src_tree)
+                and not is_relative_to(node.absolute_path, config.obj_tree)
+            ]
+            logging.debug(f"Found {len(used_files)} source files in cmd graph")
+        if not sbom_logging.has_errors() or config.write_output_on_error:
+            used_files_path = os.path.join(config.output_directory, config.used_files_file_name)
+            with open(used_files_path, "w", encoding="utf-8") as f:
+                f.write("\n".join(str(file_path) for file_path in used_files))
+            logging.debug(f"Successfully saved {used_files_path}")
+
     # Report collected warnings and errors in case of failure
     warning_summary = sbom_logging.summarize_warnings()
     error_summary = sbom_logging.summarize_errors()
@@ -30,6 +64,11 @@ def main():
         logging.warning(warning_summary)
     if error_summary:
         logging.error(error_summary)
+        if not config.write_output_on_error:
+            logging.info(
+                "Use --write-output-on-error to generate output documents even when errors occur. "
+                "Note that in this case the generated SPDX documents may be incomplete."
+            )
         sys.exit(1)
 
 
diff --git a/tools/sbom/sbom/cmd_graph/__init__.py b/tools/sbom/sbom/cmd_graph/__init__.py
new file mode 100644
index 000000000..9d661a5c3
--- /dev/null
+++ b/tools/sbom/sbom/cmd_graph/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from .cmd_graph import CmdGraph
+from .cmd_graph_node import CmdGraphNode, CmdGraphNodeConfig
+
+__all__ = ["CmdGraph", "CmdGraphNode", "CmdGraphNodeConfig"]
diff --git a/tools/sbom/sbom/cmd_graph/cmd_file.py b/tools/sbom/sbom/cmd_graph/cmd_file.py
new file mode 100644
index 000000000..d85ef5de0
--- /dev/null
+++ b/tools/sbom/sbom/cmd_graph/cmd_file.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import os
+import re
+from dataclasses import dataclass, field
+from sbom.cmd_graph.deps_parser import parse_cmd_file_deps
+from sbom.cmd_graph.savedcmd_parser import parse_inputs_from_commands
+import sbom.sbom_logging as sbom_logging
+from sbom.path_utils import PathStr
+
+SAVEDCMD_PATTERN = re.compile(r"^(saved)?cmd_.*?:=\s*(?P<full_command>.+)$")
+SOURCE_PATTERN = re.compile(r"^source.*?:=\s*(?P<source_file>.+)$")
+
+
+@...aclass
+class CmdFile:
+    cmd_file_path: PathStr
+    savedcmd: str
+    source: PathStr | None = None
+    deps: list[str] = field(default_factory=list[str])
+    make_rules: list[str] = field(default_factory=list[str])
+
+    @classmethod
+    def create(cls, cmd_file_path: PathStr) -> "CmdFile | None":
+        """
+        Parses a .cmd file.
+        .cmd files are assumed to have one of the following structures:
+        1. Full Cmd File
+            (saved)?cmd_<output> := <command>
+            source_<output> := <main_input>
+            deps_<output> := \
+            <dependencies>
+            <output> := $(deps_<output>)
+            $(deps_<output>):
+
+        2. Command Only Cmd File
+            (saved)?cmd_<output> := <command>
+
+        3. Single Dependency Cmd File
+            (saved)?cmd_<output> := <command>
+            <output> := <dependency>
+
+        Args:
+            cmd_file_path (Path): absolute Path to a .cmd file
+
+        Returns:
+            cmd_file (CmdFile): Parsed cmd file.
+        """
+        with open(cmd_file_path, "rt") as f:
+            lines = [line.strip() for line in f.readlines() if line.strip() != "" and not line.startswith("#")]
+
+        # savedcmd
+        match = SAVEDCMD_PATTERN.match(lines[0])
+        if match is None:
+            sbom_logging.error(
+                "Skip parsing '{cmd_file_path}' because no 'savedcmd_' command was found.", cmd_file_path=cmd_file_path
+            )
+            return None
+        savedcmd = match.group("full_command")
+
+        # Command Only Cmd File
+        if len(lines) == 1:
+            return CmdFile(cmd_file_path, savedcmd)
+
+        # Single Dependency Cmd File
+        if len(lines) == 2:
+            dep = lines[1].split(":")[1].strip()
+            return CmdFile(cmd_file_path, savedcmd, deps=[dep])
+
+        # Full Cmd File
+        # source
+        line1 = SOURCE_PATTERN.match(lines[1])
+        if line1 is None:
+            sbom_logging.error(
+                "Skip parsing '{cmd_file_path}' because no 'source_' entry was found.", cmd_file_path=cmd_file_path
+            )
+            return CmdFile(cmd_file_path, savedcmd)
+        source = line1.group("source_file")
+
+        # deps
+        deps: list[str] = []
+        i = 3  # lines[2] includes the variable assignment but no actual dependency, so we need to start at lines[3].
+        while i < len(lines):
+            if not lines[i].endswith("\\"):
+                break
+            deps.append(lines[i][:-1].strip())
+            i += 1
+
+        # make_rules
+        make_rules = lines[i:]
+
+        return CmdFile(cmd_file_path, savedcmd, source, deps, make_rules)
+
+    def get_dependencies(
+        self: "CmdFile", target_path: PathStr, obj_tree: PathStr, fail_on_unknown_build_command: bool
+    ) -> list[PathStr]:
+        """
+        Parses all dependencies required to build a target file from its cmd file.
+
+        Args:
+            target_path: path to the target file relative to `obj_tree`.
+            obj_tree: absolute path to the object tree.
+            fail_on_unknown_build_command: Whether to fail if an unknown build command is encountered.
+
+        Returns:
+            list[PathStr]: dependency file paths relative to `obj_tree`.
+        """
+        input_files: list[PathStr] = [
+            str(p) for p in parse_inputs_from_commands(self.savedcmd, fail_on_unknown_build_command)
+        ]
+        if self.deps:
+            input_files += [str(p) for p in parse_cmd_file_deps(self.deps)]
+        input_files = _expand_resolve_files(input_files, obj_tree)
+
+        cmd_file_dependencies: list[PathStr] = []
+        for input_file in input_files:
+            # input files are either absolute or relative to the object tree
+            if os.path.isabs(input_file):
+                input_file = os.path.relpath(input_file, obj_tree)
+            if input_file == target_path:
+                # Skip target file to prevent cycles. This is necessary because some multi stage commands first create an output and then pass it as input to the next command, e.g., objcopy.
+                continue
+            cmd_file_dependencies.append(input_file)
+
+        return cmd_file_dependencies
+
+
+def _expand_resolve_files(input_files: list[PathStr], obj_tree: PathStr) -> list[PathStr]:
+    """
+    Expands resolve files which may reference additional files via '@' notation.
+
+    Args:
+        input_files (list[PathStr]): List of file paths relative to the object tree, where paths starting with '@' refer to files
+                                     containing further file paths, each on a separate line.
+        obj_tree: Absolute path to the root of the object tree.
+
+    Returns:
+        list[PathStr]: Flattened list of all input file paths, with any nested '@' file references resolved recursively.
+    """
+    expanded_input_files: list[PathStr] = []
+    for input_file in input_files:
+        if not input_file.startswith("@"):
+            expanded_input_files.append(input_file)
+            continue
+        with open(os.path.join(obj_tree, input_file.lstrip("@")), "rt") as f:
+            resolve_file_content = [line_stripped for line in f.readlines() if (line_stripped := line.strip())]
+        expanded_input_files += _expand_resolve_files(resolve_file_content, obj_tree)
+    return expanded_input_files
diff --git a/tools/sbom/sbom/cmd_graph/cmd_graph.py b/tools/sbom/sbom/cmd_graph/cmd_graph.py
new file mode 100644
index 000000000..cad54243f
--- /dev/null
+++ b/tools/sbom/sbom/cmd_graph/cmd_graph.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from collections import deque
+from dataclasses import dataclass, field
+from typing import Iterator
+
+from sbom.cmd_graph.cmd_graph_node import CmdGraphNode, CmdGraphNodeConfig
+from sbom.path_utils import PathStr
+
+
+@...aclass
+class CmdGraph:
+    """Directed acyclic graph of build dependencies primarily inferred from .cmd files produced during kernel builds"""
+
+    roots: list[CmdGraphNode] = field(default_factory=list[CmdGraphNode])
+
+    @classmethod
+    def create(cls, root_paths: list[PathStr], config: CmdGraphNodeConfig) -> "CmdGraph":
+        """
+        Recursively builds a dependency graph starting from `root_paths`.
+        Dependencies are mainly discovered by parsing the `.cmd` files.
+
+        Args:
+            root_paths (list[PathStr]): List of paths to root outputs relative to obj_tree
+            config (CmdGraphNodeConfig): Configuration options
+
+        Returns:
+            CmdGraph: A graph of all build dependencies for the given root files.
+        """
+        node_cache: dict[PathStr, CmdGraphNode] = {}
+        root_nodes = [CmdGraphNode.create(root_path, config, node_cache) for root_path in root_paths]
+        return CmdGraph(root_nodes)
+
+    def __iter__(self) -> Iterator[CmdGraphNode]:
+        """Traverse the graph in breadth-first order, yielding each unique node."""
+        visited: set[PathStr] = set()
+        node_stack: deque[CmdGraphNode] = deque(self.roots)
+        while len(node_stack) > 0:
+            node = node_stack.popleft()
+            if node.absolute_path in visited:
+                continue
+
+            visited.add(node.absolute_path)
+            node_stack.extend(node.children)
+            yield node
diff --git a/tools/sbom/sbom/cmd_graph/cmd_graph_node.py b/tools/sbom/sbom/cmd_graph/cmd_graph_node.py
new file mode 100644
index 000000000..fdaed0f0c
--- /dev/null
+++ b/tools/sbom/sbom/cmd_graph/cmd_graph_node.py
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+from itertools import chain
+import logging
+import os
+from typing import Iterator, Protocol
+
+from sbom import sbom_logging
+from sbom.cmd_graph.cmd_file import CmdFile
+from sbom.path_utils import PathStr, is_relative_to
+
+
+@...aclass
+class IncbinDependency:
+    node: "CmdGraphNode"
+    full_statement: str
+
+
+class CmdGraphNodeConfig(Protocol):
+    obj_tree: PathStr
+    src_tree: PathStr
+    fail_on_unknown_build_command: bool
+
+
+@...aclass
+class CmdGraphNode:
+    """A node in the cmd graph representing a single file and its dependencies."""
+
+    absolute_path: PathStr
+    """Absolute path to the file this node represents."""
+
+    cmd_file: CmdFile | None = None
+    """Parsed .cmd file describing how the file at absolute_path was built, or None if not available."""
+
+    cmd_file_dependencies: list["CmdGraphNode"] = field(default_factory=list["CmdGraphNode"])
+    incbin_dependencies: list[IncbinDependency] = field(default_factory=list[IncbinDependency])
+    hardcoded_dependencies: list["CmdGraphNode"] = field(default_factory=list["CmdGraphNode"])
+
+    @property
+    def children(self) -> Iterator["CmdGraphNode"]:
+        seen: set[PathStr] = set()
+        for node in chain(
+            self.cmd_file_dependencies,
+            (dep.node for dep in self.incbin_dependencies),
+            self.hardcoded_dependencies,
+        ):
+            if node.absolute_path not in seen:
+                seen.add(node.absolute_path)
+                yield node
+
+    @classmethod
+    def create(
+        cls,
+        target_path: PathStr,
+        config: CmdGraphNodeConfig,
+        cache: dict[PathStr, "CmdGraphNode"] | None = None,
+        depth: int = 0,
+    ) -> "CmdGraphNode":
+        """
+        Recursively builds a dependency graph starting from `target_path`.
+        Dependencies are mainly discovered by parsing the `.<target_path.name>.cmd` file.
+
+        Args:
+            target_path: Path to the target file relative to obj_tree.
+            config: Config options
+            cache: Tracks processed nodes to prevent cycles.
+            depth: Internal parameter to track the current recursion depth.
+
+        Returns:
+            CmdGraphNode: cmd graph node representing the target file
+        """
+        if cache is None:
+            cache = {}
+
+        target_path_absolute = (
+            os.path.realpath(p)
+            if os.path.islink(p := os.path.join(config.obj_tree, target_path))
+            else os.path.normpath(p)
+        )
+
+        if target_path_absolute in cache:
+            return cache[target_path_absolute]
+
+        if depth == 0:
+            logging.debug(f"Build node: {target_path}")
+
+        cmd_file_path = _to_cmd_path(target_path_absolute)
+        cmd_file = CmdFile.create(cmd_file_path) if os.path.exists(cmd_file_path) else None
+        node = CmdGraphNode(target_path_absolute, cmd_file)
+        cache[target_path_absolute] = node
+
+        if not os.path.exists(target_path_absolute):
+            error_or_warning = (
+                sbom_logging.error
+                if is_relative_to(target_path_absolute, config.obj_tree)
+                or is_relative_to(target_path_absolute, config.src_tree)
+                else sbom_logging.warning
+            )
+            error_or_warning(
+                "Skip parsing '{target_path_absolute}' because file does not exist",
+                target_path_absolute=target_path_absolute,
+            )
+            return node
+
+        if cmd_file is not None:
+            node.cmd_file_dependencies = [
+                CmdGraphNode.create(cmd_file_dependency_path, config, cache, depth + 1)
+                for cmd_file_dependency_path in cmd_file.get_dependencies(
+                    target_path, config.obj_tree, config.fail_on_unknown_build_command
+                )
+            ]
+
+        return node
+
+
+def _to_cmd_path(path: PathStr) -> PathStr:
+    name = os.path.basename(path)
+    return path.removesuffix(name) + f".{name}.cmd"
diff --git a/tools/sbom/sbom/cmd_graph/deps_parser.py b/tools/sbom/sbom/cmd_graph/deps_parser.py
new file mode 100644
index 000000000..fb3ccdd41
--- /dev/null
+++ b/tools/sbom/sbom/cmd_graph/deps_parser.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import re
+import sbom.sbom_logging as sbom_logging
+from sbom.path_utils import PathStr
+
+# Match dependencies on config files
+# Example match: "$(wildcard include/config/CONFIG_SOMETHING)"
+CONFIG_PATTERN = re.compile(r"\$\(wildcard (include/config/[^)]+)\)")
+
+# Match dependencies on the objtool binary
+# Example match: "$(wildcard ./tools/objtool/objtool)"
+OBJTOOL_PATTERN = re.compile(r"\$\(wildcard \./tools/objtool/objtool\)")
+
+# Match any Makefile wildcard reference
+# Example match: "$(wildcard path/to/file)"
+WILDCARD_PATTERN = re.compile(r"\$\(wildcard (?P<path>[^)]+)\)")
+
+# Match ordinary paths:
+# - ^(\/)?: Optionally starts with a '/'
+# - (([\w\-\., ]*)\/)*: Zero or more directory levels
+# - [\w\-\., ]+$: Path component (file or directory)
+# Example matches: "/foo/bar.c", "dir1/dir2/file.txt", "plainfile"
+VALID_PATH_PATTERN = re.compile(r"^(\/)?(([\w\-\., ]*)\/)*[\w\-\., ]+$")
+
+
+def parse_cmd_file_deps(deps: list[str]) -> list[PathStr]:
+    """
+    Parse dependency strings of a .cmd file and return valid input file paths.
+
+    Args:
+        deps: List of dependency strings as found in `.cmd` files.
+
+    Returns:
+        input_files: List of input file paths
+    """
+    input_files: list[PathStr] = []
+    for dep in deps:
+        dep = dep.strip()
+        match dep:
+            case _ if CONFIG_PATTERN.match(dep) or OBJTOOL_PATTERN.match(dep):
+                # config paths like include/config/<CONFIG_NAME> should not be included in the graph
+                continue
+            case _ if match := WILDCARD_PATTERN.match(dep):
+                path = match.group("path")
+                input_files.append(path)
+            case _ if VALID_PATH_PATTERN.match(dep):
+                input_files.append(dep)
+            case _:
+                sbom_logging.error("Skip parsing dependency {dep} because of unrecognized format", dep=dep)
+    return input_files
diff --git a/tools/sbom/sbom/config.py b/tools/sbom/sbom/config.py
index 3dc569ae0..39e556a4c 100644
--- a/tools/sbom/sbom/config.py
+++ b/tools/sbom/sbom/config.py
@@ -3,15 +3,43 @@
 
 import argparse
 from dataclasses import dataclass
+import os
+from typing import Any
+from sbom.path_utils import PathStr
 
 
 @dataclass
 class KernelSbomConfig:
+    src_tree: PathStr
+    """Absolute path to the Linux kernel source directory."""
+
+    obj_tree: PathStr
+    """Absolute path to the build output directory."""
+
+    root_paths: list[PathStr]
+    """List of paths to root outputs (relative to obj_tree) to base the SBOM on."""
+
+    generate_used_files: bool
+    """Whether to generate a flat list of all source files used in the build.
+    If False, no used-files document is created."""
+
+    used_files_file_name: str
+    """If `generate_used_files` is True, specifies the file name for the used-files document."""
+
+    output_directory: PathStr
+    """Path to the directory where the generated output documents will be saved."""
+
     debug: bool
     """Whether to enable debug logging."""
 
+    fail_on_unknown_build_command: bool
+    """Whether to fail if an unknown build command is encountered in a .cmd file."""
+
+    write_output_on_error: bool
+    """Whether to write output documents even if errors occur."""
+
 
-def _parse_cli_arguments() -> dict[str, bool]:
+def _parse_cli_arguments() -> dict[str, Any]:
     """
     Parse command-line arguments using argparse.
 
@@ -19,8 +47,49 @@ def _parse_cli_arguments() -> dict[str, bool]:
         Dictionary of parsed arguments.
     """
     parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawTextHelpFormatter,
         description="Generate SPDX SBOM documents for kernel builds",
     )
+    parser.add_argument(
+        "--src-tree",
+        default="../linux",
+        help="Path to the kernel source tree (default: ../linux)",
+    )
+    parser.add_argument(
+        "--obj-tree",
+        default="../linux/kernel_build",
+        help="Path to the build output directory (default: ../linux/kernel_build)",
+    )
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument(
+        "--roots",
+        nargs="+",
+        default="arch/x86/boot/bzImage",
+        help="Space-separated list of paths relative to obj-tree for which the SBOM will be created.\n"
+        "Cannot be used together with --roots-file. (default: arch/x86/boot/bzImage)",
+    )
+    group.add_argument(
+        "--roots-file",
+        help="Path to a file containing the root paths (one per line). Cannot be used together with --roots.",
+    )
+    parser.add_argument(
+        "--generate-used-files",
+        action="store_true",
+        default=False,
+        help=(
+            "Whether to create the sbom.used-files.txt file, a flat list of all "
+            "source files used for the kernel build.\n"
+            "If src-tree and obj-tree are equal it is not possible to reliably "
+            "classify source files.\n"
+            "In this case sbom.used-files.txt will contain all files used for the "
+            "kernel build including all build artifacts. (default: False)"
+        ),
+    )
+    parser.add_argument(
+        "--output-directory",
+        default=".",
+        help="Path to the directory where the generated output documents will be stored (default: .)",
+    )
     parser.add_argument(
         "--debug",
         action="store_true",
@@ -28,6 +97,28 @@ def _parse_cli_arguments() -> dict[str, bool]:
         help="Enable debug logs (default: False)",
     )
 
+    # Error handling settings
+    parser.add_argument(
+        "--do-not-fail-on-unknown-build-command",
+        action="store_true",
+        default=False,
+        help=(
+            "Whether to fail if an unknown build command is encountered in a .cmd file.\n"
+            "If set to True, errors are logged as warnings instead. (default: False)"
+        ),
+    )
+    parser.add_argument(
+        "--write-output-on-error",
+        action="store_true",
+        default=False,
+        help=(
+            "Write output documents even if errors occur. The resulting documents "
+            "may be incomplete.\n"
+            "A summary of warnings and errors can be found in the 'comment' property "
+            "of the CreationInfo element. (default: False)"
+        ),
+    )
+
     args = vars(parser.parse_args())
     return args
 
@@ -42,6 +133,58 @@ def get_config() -> KernelSbomConfig:
     # Parse cli arguments
     args = _parse_cli_arguments()
 
+    # Extract and validate cli arguments
+    src_tree = os.path.realpath(args["src_tree"])
+    obj_tree = os.path.realpath(args["obj_tree"])
+    root_paths = []
+    if args["roots_file"]:
+        with open(args["roots_file"], "rt") as f:
+            root_paths = [root.strip() for root in f.readlines()]
+    else:
+        root_paths = args["roots"]
+    _validate_path_arguments(src_tree, obj_tree, root_paths)
+
+    generate_used_files = args["generate_used_files"]
+    output_directory = os.path.realpath(args["output_directory"])
     debug = args["debug"]
 
-    return KernelSbomConfig(debug=debug)
+    fail_on_unknown_build_command = not args["do_not_fail_on_unknown_build_command"]
+    write_output_on_error = args["write_output_on_error"]
+
+    # Hardcoded config
+    used_files_file_name = "sbom.used-files.txt"
+
+    return KernelSbomConfig(
+        src_tree=src_tree,
+        obj_tree=obj_tree,
+        root_paths=root_paths,
+        generate_used_files=generate_used_files,
+        used_files_file_name=used_files_file_name,
+        output_directory=output_directory,
+        debug=debug,
+        fail_on_unknown_build_command=fail_on_unknown_build_command,
+        write_output_on_error=write_output_on_error,
+    )
+
+
+def _validate_path_arguments(src_tree: PathStr, obj_tree: PathStr, root_paths: list[PathStr]) -> None:
+    """
+    Validate that the provided paths exist.
+
+    Args:
+        src_tree: Absolute path to the source tree.
+        obj_tree: Absolute path to the object tree.
+        root_paths: List of root paths relative to obj_tree.
+
+    Raises:
+        argparse.ArgumentTypeError: If any of the paths don't exist.
+    """
+    if not os.path.exists(src_tree):
+        raise argparse.ArgumentTypeError(f"--src-tree {src_tree} does not exist")
+    if not os.path.exists(obj_tree):
+        raise argparse.ArgumentTypeError(f"--obj-tree {obj_tree} does not exist")
+    for root_path in root_paths:
+        if not os.path.exists(os.path.join(obj_tree, root_path)):
+            raise argparse.ArgumentTypeError(
+                f"path to root artifact {os.path.join(obj_tree, root_path)} does not exist"
+            )
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ