Knuth Morris Pratt Algorithm
- Pattern matching algo
- 2 parts
- LPS
- Pattern matching
- Time , Space
- Prefix suffix
- Eg "abc"
- Proper prefix=["", "a", "ab"]
- suffix = ["", "c", "bc", "abc"]
- Eg "abc"
LPS
- Longest prefix suffix
- Time:
- Examples:
- s = "ababc"; lps = [0, 0, 1, 2, 0]
- s = "aaaa"; lps = [0, 1, 2, 3]
- s = "abacabad"; lps = [0, 0, 1, 0, 1, 2, 3, 0]
- Basic Idea: (len = lps[i-1])
- if str[len] == str[i], then lps[i] = len + 1
- if str[len] != str[i]
- if len == 0, then lps[i] = 0
- else, len = lps[len - 1], compare str[i] and str[len]
- Python
def getLPS(s, patternLength):
lps = [0] * patternLength
cL = 0 # len
i = 1
while i < len(s):
if s[i] == s[cL]:
cL += 1
lps[i] = cL
i += 1
else:
if cL == 0:
lps[i] = 0
i += 1
else:
cL = lps[cL - 1]
return lps
KMP
- Python
def KMP(s, pattern):
N, M = len(s), len(pattern)
lps = getLPS(s, M)
i, j = 0, 0
res = []
while i < N:
if s[i] == pattern[j]:
i += 1
j += 1
if j == M:
res.append(i - M)
j = lps[j - 1]
elif i < N and s[i] != pattern[j]:
if j == 0:
i += 1
else:
j = lps[j - 1]