[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20081218215308.30730.qmail@science.horizon.com>
Date: Thu, 18 Dec 2008 16:53:08 -0500
From: "George Spelvin" <linux@...izon.com>
To: linux@...izon.com, cdahlin@...hat.com
Cc: tj@...nel.org, srostedt@...hat.com, peterz@...radead.org,
linux-kernel@...r.kernel.org, andi@...stfloor.org
Subject: Re: [RFC] globmatch() helper function
Casey Dahlin <cdahlin@...hat.com> wrote:
> George Spelvin wrote:
>> Eureka!
>>
>> Never mind all of this angst; I've figured out a non-recursive way
>> to do it. Thanks for pushing me to think a bit harder and find it.
>
> Yours is better, but I may as well post my solution:
Interesting. You can see how complex that gets.
Here's my solution, currently a stand-alone executable:
#include <stdbool.h>
#include <string.h> /* For strchr */
#include <assert.h>
#define WARN_ON(x) assert(!(x))
#ifndef __pure
/* Kernel compatibility #define */
#define __pure __attribute__((pure))
#endif
/**
* is_in_class - globmatch() helper, returns true if character is in class.
* @c: Character to be tested.
* @pat: Character class to test for inclusion in, terminated by ']'.
*
* Evaluate the "body" of a character class. Given the class "[!a-z]",
* this expects @pat to point to the a, and proceeds until the closing ].
* The caller must skip the opening bracket and the complement character.
*
* The caller must verify that a terminating ] exists; this will NOT
* stop at a trailing nul byte. Also, the first character may be ];
* that is not taken as a terminator.
*
* Comparison is done using unsigned bytes, 0...255.
*/
static bool __pure
is_in_class(unsigned char c, char const *pat)
{
/*
* Iterate over each span in the character class.
* a span is either a single character x, or a
* range x-y.
*/
do {
unsigned char c1 = *pat++, c2 = c1;
if (pat[0] == '-' && pat[1] != ']') {
c2 = pat[1];
pat += 2;
}
/* Any special action if c1 > c2? */
if (c1 <= c && c <= c2)
return true;
} while (*pat != ']');
return false;
}
/**
* globmatch - Shell-style pattern matching, like !fnmatch(pat, str, 0)
* @pat: Pattern to match. Metacharacters are ?, *, [ and \.
* @str: String to match. the pattern must match the entire string.
* @maxdepth: Maximum number of (non-trailing) * permitted in pattern.
*
* Perform shell-style glob matching, returning true if it matches.
*
* Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
* treat / or leading . specially; it isn't actually used for pathnames.
*
* This is small and simple implementation intended for device blacklists
* where a string is matched against a number of patterns. Thus,
* it does not proprocess the patterns. Run-time is at most quadratic
* in strlen(@str).
*/
static bool __pure
globmatch(char const *pat, char const *str)
{
/*
* Backtrack to previous * on mismatch nad retry starting one
* character later in the string. It can be proved that there's
* never a need to backtrack multiple levels.
*/
char const *back_pat = 0, *back_str = back_str;
/*
* Loop over each token (character or class) in pat, matching
* it against the remaining unmatched tail of str. Return false
* on mismatch, or true after matching the trailing nul bytes.
*/
do {
/*
* (To handle * properly, which may match zero bytes, each
* case is required to increment the str pointer itself.)
*/
switch (pat[0]) {
char c;
case '?': /* Wildcard: anything but nul */
if (!*str++)
return false;
break;
case '*': /* Any-length wildcard */
/* This doesn't loop... */
if (pat[1] == '\0') /* Optimize trailing * case */
return true;
back_pat = pat;
back_str = str;
break;
case '[': { /* Character class */
/* Is it an inverted character class? */
bool inv = (pat[1] == '^' || pat[1] == '!');
/* If no terminating ], interpret as plain text. */
char const *end = strchr(pat+2+inv, ']');
if (!end)
goto def; /* Or return -1 for malformed pattern? */
pat += 1 + inv;
c = *str++;
if (is_in_class(c, pat) == inv)
goto backtrack;
pat = end;
}
break;
case '\\':
pat++;
/* FALLLTHROUGH */
default: /* Literal character */
def:
c = *str++;
if (*pat == c)
break;
backtrack:
if (c == '\0' || !back_pat)
return false; /* No point continuing */
/* Try again from last *, one character later in str. */
pat = back_pat;
str = ++back_str;
break;
}
} while (*pat++);
return true;
}
/* Test code */
#include <stdio.h>
#include <stdlib.h>
static void
test(char const *pat, char const *str, bool expected)
{
bool match = globmatch(pat, str);
printf("\"%s\" vs. \"%s\": %s %s\n", pat, str, match ? "match" : "mismatch",
match == expected ? "OK" : "*** ERROR ***");
if (match != expected)
exit(1);
}
int
main(void)
{
test("a", "a", true);
test("a", "b", false);
test("a", "aa", false);
test("a", "", false);
test("", "", true);
test("", "a", false);
test("?", "a", true);
test("?", "aa", false);
test("??", "a", false);
test("?x?", "axb", true);
test("?x?", "abx", false);
test("?x?", "xab", false);
test("*??", "a", false);
test("*??", "ab", true);
test("*??", "abc", true);
test("??*", "a", false);
test("??*", "ab", true);
test("??*", "abc", true);
test("?*?", "a", false);
test("?*?", "ab", true);
test("?*?", "abc", true);
test("*b", "b", true);
test("*b", "ab", true);
test("*b", "aab", true);
test("*bc", "abbc", true);
test("*bc", "bc", true);
test("*bc", "bbc", true);
test("*ac*", "abacadaeafag", true);
test("*ac*ae*ag*", "abacadaeafag", true);
test("*a*b*[bc]*[ef]*g*", "abacadaeafag", true);
test("*a*b*[ef]*[cd]*g*", "abacadaeafag", false);
test("*abcd*", "abcabcabcabcdefg", true);
test("*ab*cd*", "abcabcabcabcdefg", true);
test("*abcd*abcdef*", "abcabcdabcdeabcdefg", true);
test("*abcd*", "abcabcabcabcefg", false);
test("*ab*cd*", "abcabcabcabcefg", false);
test("[a]", "a", true);
test("[^a]", "a", false);
test("[!a]", "a", false);
test("[!a]", "b", true);
test("[ab]", "a", true);
test("[ab]", "b", true);
test("[ab]", "c", false);
test("[^ab]", "c", true);
test("[a-c]", "b", true);
test("[a-c]", "d", false);
test("[]a-ceg-ik[]", "a", true);
test("[]a-ceg-ik[]", "]", true);
test("[]a-ceg-ik[]", "[", true);
test("[]a-ceg-ik[]", "h", true);
test("[]a-ceg-ik[]", "f", false);
test("[!]a-ceg-ik[]", "h", false);
test("[!]a-ceg-ik[]", "]", false);
test("[!]a-ceg-ik[]", "f", true);
test("[!]a-ceg-ik[]", "f", true);
return 0;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists