# aho-corasick substring search algorithm # links * [algorithms for competitive programming](https://cp-algorithms.com/string/aho_corasick.html) * [c implementation](https://github.com/dongyx/libaca/tree/main) in less than 175 lines * [c implementation](https://github.com/farhiongit/aho-corasick-1975/blob/master/aho_corasick_template_impl.h) using the preprocessor and generic typing * [c implementation](https://github.com/mischasan/aho-corasick) using an interleaved state transition matrix * [coordinate hash trie](https://github.com/dongyx/chtrie/tree/main) * [wikipedia](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm) # minimal implementation in coffeescript ~~~ # create a trie node make_node = -> children: {}, fail: null, outputs: [] # build the Aho-Corasick automaton build_automaton = (keywords) -> root = make_node() # insert keywords into the trie for kw in keywords node = root node = node.children[ch] ?= make_node() for ch in kw node.outputs.push kw # set initial failure links (depth 1 children -> root) queue = (child for _, child of root.children) child.fail = root for child in queue # build failure links using BFS while queue.length node = queue.shift() for ch, child of node.children f = node.fail while f and not f.children[ch] f = f.fail child.fail = if f then f.children[ch] else root # merge outputs from fail link child.outputs = child.outputs.concat child.fail.outputs queue.push child # return a function to search text (text) -> node = root res = [] for i in [0...text.length] ch = text[i] while node and not node.children[ch] node = node.fail node = if node then node.children[ch] else root # record matches ending at position i res.push [i - kw.length + 1, kw] for kw in node.outputs res ~~~ example usage ~~~ search = build_automaton ["he", "hers", "she", "his"] console.log search "ushers" ~~~ the content of `root` as json ~~~ { "children": { "h": { "children": { "e": { "children": { "r": { "children": { "s": { "children": {}, "fail": { "children": { "h": { "children": { "e": { "children": {}, "outputs": [ "she", "he" ] } }, "outputs": [] } }, "outputs": [] }, "outputs": [ "hers" ] } }, "outputs": [] } }, "outputs": [ "he" ] }, "i": { "children": { "s": { "children": {}, "outputs": [ "his" ] } }, "outputs": [] } }, "outputs": [] } }, "fail": null, "outputs": [] } ~~~