# create a trie node make_node = -> children: {}, fail: null, outputs: [] # build the Aho-Corasick automaton build_automaton = (keywords) -> root = make_node() # insert keywords into the trie for kw in keywords node = root node = node.children[ch] ?= make_node() for ch in kw node.outputs.push kw # set initial failure links (depth 1 children -> root) queue = (child for _, child of root.children) child.fail = root for child in queue # build failure links using BFS while queue.length node = queue.shift() for ch, child of node.children f = node.fail while f and not f.children[ch] f = f.fail child.fail = if f then f.children[ch] else root # merge outputs from fail link child.outputs = child.outputs.concat child.fail.outputs queue.push child # return a function to search text (text) -> node = root res = [] for i in [0...text.length] ch = text[i] while node and not node.children[ch] node = node.fail node = if node then node.children[ch] else root # record matches ending at position i res.push [i - kw.length + 1, kw] for kw in node.outputs res
example usage
search = build_automaton ["he", "hers", "she", "his"] console.log search "ushers"
the content of root
as json
{ "children": { "h": { "children": { "e": { "children": { "r": { "children": { "s": { "children": {}, "fail": { "children": { "h": { "children": { "e": { "children": {}, "outputs": [ "she", "he" ] } }, "outputs": [] } }, "outputs": [] }, "outputs": [ "hers" ] } }, "outputs": [] } }, "outputs": [ "he" ] }, "i": { "children": { "s": { "children": {}, "outputs": [ "his" ] } }, "outputs": [] } }, "outputs": [] } }, "fail": null, "outputs": [] }