linux-kernel - [RFC v2 6/7] LLMinus: Add prompt token limit enforcement

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260111212915.195056-7-sashal@kernel.org>
Date: Sun, 11 Jan 2026 16:29:14 -0500
From: Sasha Levin <sashal@...nel.org>
To: tools@...nel.org
Cc: linux-kernel@...r.kernel.org,
	torvalds@...ux-foundation.org,
	broonie@...nel.org,
	Sasha Levin <sashal@...nel.org>
Subject: [RFC v2 6/7] LLMinus: Add prompt token limit enforcement

Add the max-tokens option with a 100K default to prevent prompt overflow
errors with various LLM providers. Token count is estimated at roughly
4 characters per token. When prompts exceed the limit, RAG examples are
progressively removed until the prompt fits. The token count is displayed
in invoke output.

Signed-off-by: Sasha Levin <sashal@...nel.org>
---
 tools/llminus/src/main.rs | 114 +++++++++++++++++++++++++++++++-------
 1 file changed, 95 insertions(+), 19 deletions(-)

diff --git a/tools/llminus/src/main.rs b/tools/llminus/src/main.rs
index ff668244688f..5c469e23f09a 100644
--- a/tools/llminus/src/main.rs
+++ b/tools/llminus/src/main.rs
@@ -14,6 +14,18 @@
 
 const STORE_PATH: &str = ".llminus-resolutions.json";
 
+/// Default maximum tokens for prompt (conservative for broad provider compatibility)
+/// Most providers support at least 128K; we use 100K as a safe default.
+const DEFAULT_MAX_TOKENS: usize = 100_000;
+
+/// Approximate characters per token (for English text)
+const CHARS_PER_TOKEN: usize = 4;
+
+/// Estimate the number of tokens in a text string
+fn estimate_tokens(text: &str) -> usize {
+    text.len() / CHARS_PER_TOKEN
+}
+
 #[derive(Parser)]
 #[command(name = "llminus")]
 #[command(about = "LLM-powered git conflict resolution tool")]
@@ -45,6 +57,9 @@ enum Commands {
     Resolve {
         /// Command to invoke. The prompt will be passed via stdin.
         command: String,
+        /// Maximum tokens for prompt (reduces RAG examples if exceeded)
+        #[arg(short, long, default_value_t = DEFAULT_MAX_TOKENS)]
+        max_tokens: usize,
     },
     /// Pull a kernel patch/pull request from lore.kernel.org and merge it
     Pull {
@@ -53,6 +68,9 @@ enum Commands {
         /// Command to invoke for LLM assistance
         #[arg(short, long, default_value = "llm")]
         command: String,
+        /// Maximum tokens for prompt (reduces RAG examples if exceeded)
+        #[arg(long, default_value_t = DEFAULT_MAX_TOKENS)]
+        max_tokens: usize,
     },
 }
 
@@ -825,6 +843,7 @@ fn parse_conflict_file(path: &str) -> Result<Vec<ConflictFile>> {
 }
 
 /// Result of a similarity search
+#[derive(Clone)]
 struct SimilarResolution {
     resolution: MergeResolution,
     similarity: f32,
@@ -1632,7 +1651,7 @@ fn build_resolve_prompt(
     prompt
 }
 
-fn resolve(command: &str) -> Result<()> {
+fn resolve(command: &str, max_tokens: usize) -> Result<()> {
     // Get merge context (what branch/tag is being merged)
     let merge_ctx = get_merge_context();
     if let Some(ref source) = merge_ctx.merge_source {
@@ -1649,17 +1668,45 @@ fn resolve(command: &str) -> Result<()> {
 
     // Try to find similar historical resolutions (gracefully handles missing database)
     println!("Looking for similar historical conflicts...");
-    let similar = try_find_similar_resolutions(3, &conflicts);
+    let all_similar = try_find_similar_resolutions(3, &conflicts);
 
-    if similar.is_empty() {
+    if all_similar.is_empty() {
         println!("No historical resolution database found (run 'llminus learn' and 'llminus vectorize' to build one)");
         println!("Proceeding without historical examples...");
     } else {
-        println!("Found {} similar historical resolutions", similar.len());
+        println!("Found {} similar historical resolutions", all_similar.len());
+    }
+
+    // Build the prompt with adaptive RAG example reduction
+    let mut similar = all_similar.clone();
+    let mut prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, None);
+    let mut tokens = estimate_tokens(&prompt);
+
+    // Reduce RAG examples until we're under the token limit
+    while tokens > max_tokens && !similar.is_empty() {
+        let original_count = all_similar.len();
+        similar.pop(); // Remove the least similar (last) example
+        prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, None);
+        tokens = estimate_tokens(&prompt);
+
+        if similar.len() < original_count {
+            println!(
+                "Reduced RAG examples from {} to {} to fit token limit (~{} tokens, limit: {})",
+                original_count,
+                similar.len(),
+                tokens,
+                max_tokens
+            );
+        }
+    }
+
+    if tokens > max_tokens {
+        println!(
+            "Warning: Prompt still exceeds token limit (~{} tokens, limit: {}) even without RAG examples",
+            tokens, max_tokens
+        );
     }
 
-    // Build the prompt and invoke LLM
-    let prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, None);
     invoke_llm(command, &prompt)
 }
 
@@ -1668,7 +1715,8 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<()> {
     use std::io::Write;
     use std::process::Stdio;
 
-    println!("Invoking: {} (prompt: {} bytes)", command, prompt.len());
+    let tokens = estimate_tokens(prompt);
+    println!("Invoking: {} (prompt: {} bytes, ~{} tokens)", command, prompt.len(), tokens);
     println!("{}", "=".repeat(80));
 
     // Parse command (handle arguments)
@@ -1708,7 +1756,7 @@ fn invoke_llm(command: &str, prompt: &str) -> Result<()> {
 }
 
 /// Pull a kernel pull request from lore.kernel.org
-fn pull(message_id: &str, command: &str) -> Result<()> {
+fn pull(message_id: &str, command: &str, max_tokens: usize) -> Result<()> {
     check_repo()?;
 
     // Step 1: Fetch and parse the pull request email
@@ -1744,16 +1792,44 @@ fn pull(message_id: &str, command: &str) -> Result<()> {
 
     // Try to find similar historical resolutions
     println!("Looking for similar historical conflicts...");
-    let similar = try_find_similar_resolutions(3, &conflicts);
+    let all_similar = try_find_similar_resolutions(3, &conflicts);
 
-    if similar.is_empty() {
+    if all_similar.is_empty() {
         println!("No historical resolution database found (this is optional)");
     } else {
-        println!("Found {} similar historical resolutions", similar.len());
+        println!("Found {} similar historical resolutions", all_similar.len());
+    }
+
+    // Build the prompt with adaptive RAG example reduction
+    let mut similar = all_similar.clone();
+    let mut prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, Some(&pull_req));
+    let mut tokens = estimate_tokens(&prompt);
+
+    // Reduce RAG examples until we're under the token limit
+    while tokens > max_tokens && !similar.is_empty() {
+        let original_count = all_similar.len();
+        similar.pop(); // Remove the least similar (last) example
+        prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, Some(&pull_req));
+        tokens = estimate_tokens(&prompt);
+
+        if similar.len() < original_count {
+            println!(
+                "Reduced RAG examples from {} to {} to fit token limit (~{} tokens, limit: {})",
+                original_count,
+                similar.len(),
+                tokens,
+                max_tokens
+            );
+        }
+    }
+
+    if tokens > max_tokens {
+        println!(
+            "Warning: Prompt still exceeds token limit (~{} tokens, limit: {}) even without RAG examples",
+            tokens, max_tokens
+        );
     }
 
-    // Build the prompt with pull request context and invoke LLM
-    let prompt = build_resolve_prompt(&conflicts, &similar, &merge_ctx, Some(&pull_req));
     println!("\n=== Invoking LLM for Conflict Resolution ===");
     invoke_llm(command, &prompt)?;
 
@@ -1815,8 +1891,8 @@ fn main() -> Result<()> {
         Commands::Learn { range } => learn(range.as_deref()),
         Commands::Vectorize { batch_size } => vectorize(batch_size),
         Commands::Find { n } => find(n),
-        Commands::Resolve { command } => resolve(&command),
-        Commands::Pull { message_id, command } => pull(&message_id, &command),
+        Commands::Resolve { command, max_tokens } => resolve(&command, max_tokens),
+        Commands::Pull { message_id, command, max_tokens } => pull(&message_id, &command, max_tokens),
     }
 }
 
@@ -1890,7 +1966,7 @@ fn test_find_command_with_n() {
     fn test_resolve_command_parses() {
         let cli = Cli::try_parse_from(["llminus", "resolve", "my-llm"]).unwrap();
         match cli.command {
-            Commands::Resolve { command } => assert_eq!(command, "my-llm"),
+            Commands::Resolve { command, .. } => assert_eq!(command, "my-llm"),
             _ => panic!("Expected Resolve command"),
         }
     }
@@ -1899,7 +1975,7 @@ fn test_resolve_command_parses() {
     fn test_resolve_command_with_args() {
         let cli = Cli::try_parse_from(["llminus", "resolve", "my-llm --model fancy"]).unwrap();
         match cli.command {
-            Commands::Resolve { command } => assert_eq!(command, "my-llm --model fancy"),
+            Commands::Resolve { command, .. } => assert_eq!(command, "my-llm --model fancy"),
             _ => panic!("Expected Resolve command"),
         }
     }
@@ -2210,7 +2286,7 @@ fn test_parse_multiple_conflicts() {
     fn test_pull_command_parses() {
         let cli = Cli::try_parse_from(["llminus", "pull", "test@...nel.org"]).unwrap();
         match cli.command {
-            Commands::Pull { message_id, command } => {
+            Commands::Pull { message_id, command, .. } => {
                 assert_eq!(message_id, "test@...nel.org");
                 assert_eq!(command, "llm"); // default
             }
@@ -2224,7 +2300,7 @@ fn test_pull_command_with_custom_command() {
             "llminus", "pull", "test@...nel.org", "-c", "my-llm --model fancy"
         ]).unwrap();
         match cli.command {
-            Commands::Pull { message_id, command } => {
+            Commands::Pull { message_id, command, .. } => {
                 assert_eq!(message_id, "test@...nel.org");
                 assert_eq!(command, "my-llm --model fancy");
             }
-- 
2.51.0