http =  require "https"

http_get = (url) ->
  new Promise (resolve, reject) ->
    http.get url, (response) ->
      data = []
      response.on "data", (a) -> data.push a
      response.on "end", () -> resolve Buffer.concat(data).toString()
      response.on "error", (error) -> reject error


get_wiktionary_data = (char) ->
  body = await http_get "https://en.wiktionary.org/wiki/#{char}"
  html = html_parser.parse body
  b = html.querySelectorAll "p"
  b = b.map (a) ->
    strokes = a.textContent.match /(\d+) stroke/
    strokes = strokes && parseInt(strokes[1], 10)
    decomposition = a.querySelector "a[title=\"w:Chinese character description languages\"]"
    if decomposition
      decomposition = decomposition.parentNode.parentNode.textContent
      decomposition = decomposition.match(/decomposition (.*)\)/)[1]
      decomposition = (decomposition.split(" or ")[0].match(hanzi_and_idc_regexp) || []).join("")
    [char, strokes, decomposition]
  b = b.filter (a) -> a[1] || a[2]
  b.flat()

sort_data = () ->
  chars = read_csv_file "data/characters-strokes-decomposition.csv"
  chars = chars.sort (a, b) -> a[1] - b[1] || a[0].localeCompare(b[0])
  write_csv_file "data/characters-strokes-decomposition-new.csv", chars

update_extra_stroke_counts = () ->
  data = read_csv_file "data/extra-components.csv"
  data = data.sort (a, b) -> b.length - a.length
  data = delete_duplicates_stable_with_key data, 0
  data = data.filter (a) -> a.length > 1
  data = data.map (a) -> [a[0], parseInt(a[1], 10)]
  data = data.sort (a, b) -> a[1] - b[1]
  write_csv_file "data/extra-stroke-counts.csv", data

update_decompositions = (start_index, end_index) ->
  chars = read_csv_file "data/characters-strokes-decomposition.csv"
  chars = chars.filter (a) -> "1" != a[1]
  chars = chars.slice start_index, end_index
  batch_size = 10
  batches_count = Math.ceil chars.length / batch_size
  batches = []
  i = 0
  while i < batches_count
    batches.push chars.slice i * batch_size, (i + 1) * batch_size
    i += 1
  batches.forEach (a) ->
    requests = Promise.all a.map (b) -> get_wiktionary_data b[0]
    requests.then (b) ->
      b.forEach (b, i) ->
        c = a[i]
        if (b[1] && b[1] != parseInt(c[1], 10)) || (b[2] && b[2].length >= c[2].length && b[2] != c[2])
          c[1] = b[1]
          c[2] = b[2]
          console.log c.join " "

add_new_data = () ->
  chars = read_csv_file "data/characters-strokes-decomposition.csv"
  new_data = read_csv_file("new-data").filter (a) -> a[0].length
  all = chars.concat new_data
  all_index = {}
  all.forEach (a) -> all_index[a[0]] = a
  all = Object.values(all_index).sort (a, b) -> a[1] - b[1] || a[0].localeCompare(b[0])
  write_csv_file "data/characters-strokes-decomposition-new.csv", all

find_component_repetitions = () ->
  chars = read_csv_file "data/characters-strokes-decomposition.csv"
  chars = chars.forEach (a) ->
    if a[2]
      b = a[2].replace non_hanzi_regexp, ""
      if 1 == delete_duplicates(split_chars(b)).length
        console.log a[0], b


update_characters_by_pinyin_learning = ->
  by_pinyin = {}
  chars = get_all_characters_with_pinyin().filter((a) -> !a[1].endsWith("5"))
  chars.forEach (a) -> object_array_add(by_pinyin, a[1], a[0])
  rows = []
  for pinyin, chars_array of by_pinyin
    for character in chars_array
      rows.push [character, pinyin, chars_array.length]
  rows = rows.sort (a, b) -> (b[2] - a[2]) || a[1].localeCompare(b[1]) or a[0].localeCompare(b[0])
  rows = characters_add_learning_data rows
  write_csv_file("data/characters-by-pinyin-learning.csv", rows)
  rows = (a for a in rows.reverse() when a[2] < 3)
  write_csv_file("data/characters-by-pinyin-learning-rare.csv", rows)

to_fullwidth = (str) ->
  str.replace /./g, (char) ->
    code = char.charCodeAt 0
    if char is " "
      "　"
    else if code >= 33 and code <= 126
      String.fromCharCode(code - 33 + 65281)
    else
      char

format_lines_vertically = (rows) ->
  columns = rows.map ([syllable, chars]) -> [split_chars(to_fullwidth(syllable)), split_chars(to_fullwidth(chars))]
  syllable_max_height = Math.max.apply(null, columns.map (a) -> a[0].length)
  chars_max_height = Math.max.apply(null, columns.map (a) -> a[1].length)
  delimiter = "　"
  csv_lines = []
  for i in [0...syllable_max_height]
    row = columns.map (a) -> if a[0][i]? then a[0][i] else ""
    csv_lines.push row.join delimiter
  for i in [0...chars_max_height]
    row = columns.map (a) -> if a[1][i]? then a[1][i] else ""
    csv_lines.push row.join delimiter
  csv_lines

update_hsk = () ->
  files = fs.readdirSync "data/hsk"
  data = files.map (a) -> read_csv_file("data/hsk/#{a}", "\t")
  data = data.flat(1).map (a) ->
    pinyin = pinyin_split2(a[2]).map(pinyin_utils.markToNumber).join("").toLowerCase()
    [a[1], pinyin]
  write_csv_file "data/hsk.csv", data

update_hsk_pinyin_translations = () ->
  dictionary_lookup = dictionary_index_word_pinyin_f 0, 1
  rows = read_csv_file("data/hsk.csv").map (a) ->
    translations = dictionary_lookup a[0], a[1]
    return a unless translations
    a.concat [translations[0][2]]
  frequency_index = get_character_frequency_index()
  rows = rows.sort (a, b) ->
    chars_a = split_chars a[0]
    chars_b = split_chars b[0]
    max_a = Math.max.apply Math, chars_a.map (char) -> frequency_index[char]
    max_b = Math.max.apply Math, chars_b.map (char) -> frequency_index[char]
    max_a - max_b
  write_csv_file "data/hsk-pinyin-translation.csv", rows

dsv_add_example_words = () ->
  dictionary = dictionary_index_word_pinyin_f 0, 1
  words = read_csv_file "data/words-by-frequency-with-pinyin-translation.csv"
  rows = read_csv_file(0).map (a) ->
    char_words = words.filter((b) -> b[0].includes a[0])
    unless char_words.length
      char_words = dictionary(a[0], a[1]) || []
    a.push char_words.slice(0, 5).map((b) -> b.join(" ")).join("\n")
    a
  write_csv_file 0, rows

dsv_add_translations_with_pinyin = (word_index, pinyin_index) ->
  dictionary_lookup = dictionary_index_word_pinyin_f 0, 1
  rows = read_csv_file(0).map (a) ->
    translations = dictionary_lookup a[word_index], a[pinyin_index]
    return a unless translations
    a.concat [translations[0][2]]
  console.log csv_stringify.stringify(rows, {delimiter: " "}, on_error).trim()

dsv_add_translations = (word_index) ->
  dictionary_lookup = dictionary_index_word_f()
  rows = read_csv_file(0).map (a) ->
    translations = dictionary_lookup a[word_index]
    return a unless translations
    a.concat [translations[0][1], translations[0][2]]
  write_csv_file 0, rows

dsv_mark_to_number = (pinyin_index) ->
  rows = read_csv_file(0).map (a) ->
    a[pinyin_index] = mark_to_number a[pinyin_index]
    a
  write_csv_file 0, rows

update_frequency_pinyin = () ->
  dictionary_lookup = dictionary_index_word_pinyin_f 0, 1
  frequency = array_from_newline_file "data/words-by-frequency.csv"
  hsk = read_csv_file "data/hsk.csv"
  hsk_index = {}
  hsk.forEach (a) ->
    return if hsk_index[a[0]]
    pinyin = a[1]
    pinyin += "5" unless /[0-5]$/.test pinyin
    hsk_index[a[0]] = pinyin
  frequency_pinyin = frequency.map (a) -> [a, (hsk_index[a] || "")]
  rows = frequency_pinyin.map (a) ->
    translation = dictionary_lookup a[0], a[1]
    return [] unless translation
    [a[0], translation[0][1], translation[0][2]]
  rows = rows.filter (a) -> 3 is a.length
  write_csv_file "data/words-by-frequency-with-pinyin-translation.csv", rows
  rows = rows.map (a) -> [a[0], a[1]]
  write_csv_file "data/words-by-frequency-with-pinyin.csv", rows