# create a trie node
make_node = -> children: {}, fail: null, outputs: []
# build the Aho-Corasick automaton
build_automaton = (keywords) ->
root = make_node()
# insert keywords into the trie
for kw in keywords
node = root
node = node.children[ch] ?= make_node() for ch in kw
node.outputs.push kw
# set initial failure links (depth 1 children -> root)
queue = (child for _, child of root.children)
child.fail = root for child in queue
# build failure links using BFS
while queue.length
node = queue.shift()
for ch, child of node.children
f = node.fail
while f and not f.children[ch]
f = f.fail
child.fail = if f then f.children[ch] else root
# merge outputs from fail link
child.outputs = child.outputs.concat child.fail.outputs
queue.push child
# return a function to search text
(text) ->
node = root
res = []
for i in [0...text.length]
ch = text[i]
while node and not node.children[ch]
node = node.fail
node = if node then node.children[ch] else root
# record matches ending at position i
res.push [i - kw.length + 1, kw] for kw in node.outputs
resexample usage
search = build_automaton ["he", "hers", "she", "his"] console.log search "ushers"
the content of root as json
{
"children": {
"h": {
"children": {
"e": {
"children": {
"r": {
"children": {
"s": {
"children": {},
"fail": {
"children": {
"h": {
"children": {
"e": {
"children": {},
"outputs": [
"she",
"he"
]
}
},
"outputs": []
}
},
"outputs": []
},
"outputs": [
"hers"
]
}
},
"outputs": []
}
},
"outputs": [
"he"
]
},
"i": {
"children": {
"s": {
"children": {},
"outputs": [
"his"
]
}
},
"outputs": []
}
},
"outputs": []
}
},
"fail": null,
"outputs": []
}