csv_parse = require "csv-parse/sync"
csv_stringify = require "csv-stringify/sync"
fs = require "fs"
hanzi_tools = require "hanzi-tools"
wanakana = require "wanakana"
xml2js = require "xml2js"
coffee = require "coffeescript"
array_from_newline_file = (path) -> fs.readFileSync(path).toString().trim().split("\n")
object_array_add = (object, key, value) -> if object[key] then object[key].push value else object[key] = [value]
object_from_json_file = (path) -> JSON.parse(fs.readFileSync(path))
on_error = (a) -> if a then console.error a
read_csv_file = (path, delimiter) -> csv_parse.parse fs.readFileSync(path, "utf-8"), {delimiter: delimiter || " ", relax_column_count: true}
read_text_file = (a) -> fs.readFileSync a, "utf8"
write_text_file = (path, a) -> fs.writeFileSync path, a
delete_duplicates = (a) -> [...new Set(a)]
replace_placeholders = (text, mapping) -> text.replace /__(.*?)__/g, (_, k) -> mapping[k] or ""

write_csv_file = (path, data) ->
  csv = csv_stringify.stringify(data, {delimiter: " "}, on_error).trim()
  fs.writeFile path, csv, on_error

is_object = (a) ->
  type = typeof a
  type == "function" || type == "object" && !!a

object_tree_foreach = (a, f) ->
  Object.keys(a).forEach (key) ->
    value = a[key]
    if is_object value
      object_tree_foreach value, f
      f key, value
    else f key, value, a

deduplicate_readings = (a) ->
  # yoku/a-biru/a-biseru -> yoku/a
  a = a.split("/")
  a = a.map (a) -> a.split("-")[0].replace("'", "")
  a = a.filter (a, i, self) -> self.indexOf(a) == i
  a.join("/")

update_dictionary_character_data = () ->
  xml_parser = new xml2js.Parser()
  find_paths = (node) ->
    d_values = []
    if node?.path
      for path in node.path
        d_values.push path['$'].d
    for key, value of node
      if typeof value is 'object'
        d_values = d_values.concat find_paths value
    d_values
  get_svg = (char) ->
    filename = "0" + char.charCodeAt(0).toString(16) + ".svg"
    path = "data/kanjivg/#{filename}"
    new Promise (resolve, reject) ->
      fs.readFile path, "utf8", (error, xml) ->
        if error
          console.error error.toString(), "for character #{char}"
          resolve false
        else
          xml_parser.parseString xml, (error, result) ->
            if error then resolve false
            else resolve find_paths result.svg
  kanji = read_csv_file "data/jouyou-kanji.csv"
  # load all svg files asynchronously
  result_promises = kanji.map (a) ->
    [char, meaning, readings] = a
    readings = deduplicate_readings readings
    new Promise (resolve, reject) ->
      get_svg(a[0]).then (svg) -> resolve [char, meaning, readings, svg]
  Promise.all(result_promises).then (result) ->
    write_text_file "data/dictionary-character-data.json", JSON.stringify(result)

update_dictionary_word_data = () ->
  result = []
  dictionary = object_from_json_file "data/jmdict-translations-dictionary.json"
  by_reading = {}
  for word in Object.keys dictionary
    for b in dictionary[word]
      object_array_add by_reading, b[0], word
  words = array_from_newline_file "data/wordlex-2011.txt"
  words = words.filter (a) -> (a.length > 1 || !wanakana.isHiragana(a)) && !wanakana.isKatakana(a)
  for word in words
    entries = dictionary[word] || by_reading[word]
    continue unless entries
    for entry in entries
      romaji = wanakana.toRomaji entry[0]
      meanings = entry[1]
      if Array.isArray meanings
        result.push [word, romaji, meanings]
  write_text_file "data/dictionary-word-data.json", JSON.stringify(result)

update_dictionary_data = () ->
  update_dictionary_character_data()
  update_dictionary_word_data()

update_dictionary = () ->
  character_data = read_text_file "data/dictionary-character-data.json"
  word_data = read_text_file "data/dictionary-word-data.json"
  font = read_text_file "src/NotoSansJP-Light.ttf.base64"
  script = read_text_file "src/dictionary.coffee"
  script = replace_placeholders script, {character_data, word_data}
  script = coffee.compile(script, bare: true).trim()
  html = read_text_file "src/nihongo-dictionary-template.html"
  html = replace_placeholders html, {script, font}
  write_text_file "compiled/nihongo-dictionary.html", html

ja_overlap = () ->
  ja_dict = {}
  ja_words = JSON.parse fs.readFileSync "data/dictionary-word-data.json"
  for a in ja_words
    object_array_add ja_dict, a[0], a
  cn_dict = {}
  # depends on https://github.com/sph-mn/hanyu
  cn_words = read_csv_file "../hanyu/data/cedict.csv"
  for a in cn_words
    object_array_add cn_dict, a[0], a
  shared = []
  for a in Object.keys cn_dict
    ja = ja_dict[hanzi_tools.traditionalize a]
    continue unless ja
    cn = cn_dict[a]
    readings = ja.map((b) -> b[1]).join("/")
    shared.push [a, cn[0][1], ja[0][0], readings]
  write_csv_file "data/chinese-japanese-overlap.csv", shared

pinyin_to_alphanumeric_ascii = (a) ->
  pinyin_map =
    "ā": "a", "á": "a", "ǎ": "a", "à": "a",
    "ē": "e", "é": "e", "ě": "e", "è": "e",
    "ī": "i", "í": "i", "ǐ": "i", "ì": "i",
    "ō": "o", "ó": "o", "ǒ": "o", "ò": "o",
    "ū": "u", "ú": "u", "ǔ": "u", "ù": "u",
    "ǖ": "u", "ǘ": "u", "ǚ": "u", "ǜ": "u",
    "ü": "u", "1": "", "2": "", "3": "", "4": "", "5": "",
  a.split("").map((a) -> if a of pinyin_map then pinyin_map[a] else a).join("")

sorensen_dice = (a, b) ->
  a = new Set a
  b = new Set b
  intersection = new Set([...a].filter (c) -> b.has c)
  (2 * intersection.size) / (a.size + b.size)

sort_ja_overlap_by_similarity = () ->
  a = read_csv_file "data/chinese-japanese-overlap.csv"
  c = for b in a
    cn = pinyin_to_alphanumeric_ascii b[1]
    ja = delete_duplicates b[3].split "/"
    ja = ja.map((a) -> [a, sorensen_dice(cn, a)]).sort((a, b) -> b[1] - a[1])
    b[3] = ja.map((a) -> a[0]).join "/"
    [ja[0][1]].concat b
  c = c.sort((a, b) -> (a[1].length - b[1].length) || (b[0] - a[0]))
  a = (b.slice(1) for b in c)
  write_csv_file "data/chinese-japanese-overlap.csv", a

update_ja_overlap = () ->
  ja_overlap()
  sort_ja_overlap_by_similarity()

run = () ->

module.exports = {
  update_dictionary_data,
  update_dictionary,
  update_ja_overlap,
  run
}