27 Jun 2009 (updated 28 Jun 2009 at 08:00 UTC)
»
Here's my python implementation of Rob Pike's minimal regex
(only supports ^.*$
special characters)
algorithm:
def match(regexp, text):
if regexp.startswith("^"):
return
matchhere(regexp[1:], text)
for i in range(len(text) or 1):
if
matchhere(regexp, text[i:]):
return True
return False
def matchhere(regexp, text):
if len(regexp) == 0:
return
True
if regexp[1:].startswith("*"):
return
matchstar(regexp[0], regexp[2:], text)
if regexp == "$":
return
len(text) == 0
if len(text) > 0 and (regexp[0] ==
"." or regexp[0] == text[0]):
return
matchhere(regexp[1:], text[1:])
return False
def matchstar(c, regexp, text):
for i in range(len(text) or 1):
if
matchhere(regexp, text[i:]):
return True
if len(text)
< 0 or c not in (text[i], "."):
return False
import unittest
class Test(unittest.TestCase):
def test(self):
self.assert_
(match("a", "a"))
self.assert_
(not match("a", "b"))
self.assert_
(match("^a$", "a"))
self.assert_
(match("^a*b$", "aaaab"))
self.assert_
(not match("^a*b$", "aaacb"))
self.assert_
(match("a*a", "aa"))
Here's hoping there's no other major bugs.