#
# Copyright (c) 2021, 2023 supercell
#
# SPDX-License-Identifier: BSD-3-Clause
#
require "uri"

require "../extern/dart_uri"
require "./assets/html_entities"

module Luce
  @@one_or_more_whitespace_pattern = Regex.new("[ \n\r\t]+")

  # Escapes (`'`), (`"`), (`<`), (`>`) and (`&`) characters.
  protected def self.escape_html(html : String) : String
    escape_html_impl(
      html,
      HtmlEscapeMode.new(
        escape_apos: true,
        escape_lt_gt: true,
        escape_quot: true
      )
    )
  end

  # Escapes (`"`), (`<`) and (`>`) characters.
  protected def self.escape_html_attribute(text : String) : String
    escape_html_impl(text, HtmlEscapeMode::ATTRIBUTE)
  end

  # :nodoc:
  struct HtmlEscapeMode
    ELEMENT   = HtmlEscapeMode.new("element", true)
    ATTRIBUTE = HtmlEscapeMode.new("attribute", true, true)

    getter name : String
    getter? escape_lt_gt : Bool
    getter? escape_quot : Bool
    getter? escape_apos : Bool
    getter? escape_slash : Bool

    def initialize(@name = "custom", @escape_lt_gt = false, @escape_quot = false,
                   @escape_apos = false, @escape_slash = false)
    end

    def to_s : String
      @name
    end
  end

  # :nodoc:
  protected def self.escape_html_impl(html : String, mode : HtmlEscapeMode) : String
    builder = String::Builder.new

    html.each_char do |char|
      replacement : String = char.to_s

      case char
      when '&'
        replacement = "&amp;"
      when '"'
        replacement = "&quot;" if mode.escape_quot?
      when '\''
        replacement = "&#39;" if mode.escape_apos?
      when '<'
        replacement = "&lt;" if mode.escape_lt_gt?
      when '>'
        replacement = "&gt;" if mode.escape_lt_gt?
      when '/'
        replacement = "&#47" if mode.escape_slash?
      end

      builder << replacement
    end

    builder.to_s
  end

  # Normalizes a link destination, including the process of HTML characters
  # decoding  and percent encoding.
  protected def self.normalize_link_destination(destination : String) : String
    # See the description of these examples:
    # https://spec.commonmark.org/0.30/#example-501
    # https://spec.commonmark.org/0.30/#example-502

    # Decode first, because the destination might have been partly encoded.
    # For example https://spec.commonmark.org/0.30/#example-502.
    # With this function, `foo%20b&auml;` will be parsed in the following steps:
    # 1. foo b&auml;
    # 2. foo bä
    # 3. foo%20b%C3%A4
    begin
      destination = URI.decode(destination)
    rescue
    end
    DartURI.encode_full(decode_html_characters(destination))
  end

  # Normalizes a link title, including the process of HTML characters decoding
  # and HTML characters escaping.
  protected def self.normalize_link_title(title : String) : String
    # See the description of these examples:
    # https://spec.commonmark.org/0.30/#example-505
    # https://spec.commonmark.org/0.30/#example-506
    # https://spec.commonmark.org/0.30/#example-507
    # https://spec.commonmark.org/0.30/#example-508
    escape_html_attribute(decode_html_characters(title))
  end

  # "Normalizes" a link label, according to the [CommonMark spec].
  #
  # [CommonMark spec]: https://spec.commonmark.org/0.30/#link-label
  protected def self.normalize_link_label(label : String) : String
    label.strip.gsub(@@one_or_more_whitespace_pattern, " ").downcase(Unicode::CaseOptions::Fold)
  end

  #  Decodes HTML entity and numeric character references, for example decode
  # `&#35` to `#`.
  protected def self.decode_html_characters(input : String) : String
    input.replace_all_mapped(Luce.html_characters_pattern, ->(m : Regex::MatchData) { Luce.decode_html_characters_from_match(m) })
  end

  # Decodes HTML entity and numeric character references from the given *match*.
  protected def self.decode_html_characters_from_match(match : Regex::MatchData) : String
    text = match[0].not_nil!
    entity = match[1]?
    decimal_number = match[2]?
    hexadecimal_number = match[3]?

    # Entity references, see
    # https://spec.commonmark.org/0.30/#entity-references.
    return (html_entities_map[text]? || text) unless entity.nil?

    # Decimal numeric character references, see
    # https://spec.commonmark.org/0.30/#decimal-numeric-character-references.
    unless decimal_number.nil?
      decimal_value = Int32.new(decimal_number)
      hex_value = if decimal_value < 1_114_112 && decimal_value > 1
                    decimal_value.to_s(16).to_i32(16)
                  else
                    0xFFFD
                  end

      return hex_value.chr.to_s
    end

    # Hexadecimal numeric character references, see
    # https://spec.commonmark.org/0.30/#hexadecimal-numeric-character-references.
    unless hexadecimal_number.nil?
      hex_value = hexadecimal_number.to_i32(16)
      hex_value = 0xFFFd if hex_value > 0x10FFFF || hex_value == 0
      return hex_value.chr.to_s
    end

    text
  end

  # Escapes the ASCII punctuation characters after backslash(`\`).
  protected def self.escape_punctuation(input : String) : String
    buffer = String::Builder.new

    i = 0
    while i < input.size
      if input.codepoint_at(i) == Charcode::BACKSLASH
        _next = i + 1 < input.size ? input[i + 1] : nil
        unless _next.nil?
          i += 1 if ascii_punctuation_characters.includes?(_next)
        end
      end

      buffer << input[i]
      i += 1
    end

    buffer.to_s
  end
end

class Array(T) < Reference
  # Insert all objects of *iterable* at position *index* in this array.
  #
  # This increases the length of the array by the length of *iterable*
  # and shifts all later objects towards the end of the array.
  #
  # The *index* value must be non-negative and no greater than `size`.
  #
  # ```
  # arr = [1, 2, 3, 7]
  #
  # arr.insert_all(4, [8, 9])
  # puts arr # => [1, 2, 3, 7, 8, 9]
  #
  # arr.insert_all(3, [4, 5, 6])
  # puts arr # => [1, 2, 3, 4, 5, 6, 7, 8, 9]
  # ```
  def insert_all(index : Int32, iterable : Iterable(T)) : self
    if index < 0 || index > size
      raise IndexError.new
    end
    # We'd be inserting at the end, which is what concat does.
    if index == size
      concat(iterable)
      return self
    end

    # Nothing to insert...
    return self if iterable.size == 0

    # Make sure we're only copying the appropriate size
    # from the original array in to the space created
    # by `concat`.
    size_to_copy = size > iterable.size ? -iterable.size : size

    # Increases the capacity and size
    concat(iterable)

    # Copy the original elements in to the new space
    copy = self[index...size_to_copy]
    self[-copy.size..] = copy

    # Copy elements from iterable in to the original elements'
    # spots
    self[index, iterable.size] = iterable

    self
  end
end

class String < Reference
  # Replace all substrings that match *pattern* by a computed string.
  #
  # Creates a new string in which the non-overlapping substrings that match
  # *pattern* (the ones iterated by `pattern.all_matches(self)`) are replaced
  # by the result of calling *replace* on the corresponding `Regex::MatchData`
  # object.
  def replace_all_mapped(pattern : Regex, replace : Proc(Regex::MatchData, String)) : String
    matches = pattern.all_matches(self).reverse!
    result = self

    matches.each do |match|
      next if match[0].size == 0
      result = result.sub(match.begin...match.end, replace.call(match))
    end

    result
  end
end

class Regex < Reference
  # Matches this pattern against the string repeatedly.
  #
  # If *start* is provided, matching will start at that index.
  #
  # The returned array contains non-overlapping matches of the pattern in the
  # *string*.
  #
  # The matches are found by repeatedly finding the first match of the pattern
  # in the string, initially starting from *start*, and then from the end of
  # the previous match (but always at least one position later than the start
  # of the previous match, in case the pattern matches an empty substring).
  #
  # ```
  # exp = Regex.new(%q{(\w+)})
  # str = "Dash is a bird"
  # matches = exp.all_matches(str, 8)
  # matches.each do |m|
  #   match = m[0].not_nil!
  #   puts match
  # end
  #
  # # => a
  # # => bird
  # ```
  def all_matches(string : String, start : Int32 = 0) : Array(Regex::MatchData)
    raise IndexError.new("*start* is less than 0") if start < 0
    raise IndexError.new("*start* is greater than *string* size") if start > string.size
    matches = [] of Regex::MatchData
    offset : Int32 = start

    loop do
      _match = self.match(string, offset)
      break if _match.nil?
      matches << _match
      # account for empty spaces
      offset = _match.match.size == 0 ? offset + 1 : _match.end.not_nil!
    end
    matches
  end
end

struct Regex::MatchData
  # Returns the whole match String
  def match : String
    self[0].not_nil!
  end
end
