require_relative "./ops.rb"
require_relative "./lex.rb"

AST = Struct.new(:token, :args, :type, :impl, :label, :is_fold_op)

def split_ids(tokens, registers)
  tokens.map{|t|
    if t.type==:id && !lookup_op(t.str) && !registers[t.str]
      warn("splitting %p into chars because no op/var by that name, but it is surrounded by whitespace" % t.str, t) if t.surrounded_by_whitespace
      t.split
    else
      t
    end
  }.flatten
end

Keywords = ["set","let",">","=","del"]
def get_var_ids_set(tokens)
  names = {}
  tokens.size.times{|i|
    if tokens[i].str == "set" || tokens[i].str == "let"
      raise IogiiError.new "an id must follow `#{tokens[i].str}`", tokens[i] if !tokens[i+1] || Keywords.include?(tokens[i+1].str)
      names[tokens[i+1].str] = true
    end
  }

  nregisters = tokens.filter{|t|t.str == "="}.size
  used = [false]*26
  tokens.filter{|t|t.type == :id }.map(&:str).join.scan(/[A-Z]/){|s|
    used[s.ord-?A.ord] = true }
  starts = (1...26).filter{|i| used[i] && !used[i-1] }.unshift 0
  diffs = starts.map.with_index{|s,i| (starts[i+1]||26)-starts[i] }
  largest_diff_at = starts[diffs.index(diffs.max)]
  consec_used = used[largest_diff_at..-1].take_while{|i| i }.size
  auto_names = (0...nregisters).map{|x|
    (x + ?A.ord + largest_diff_at + [consec_used - nregisters,0].max).chr}

  auto_names.each{|s| names[s] = true }
  tokens.filter{|t|t.str == "="}.each.with_index{|t,i|
    name = auto_names[i]
    t.sets_var = name
    raise IogiiError.new"Out of letters for auto vars", t if name > ?Z
    raise IogiiError.new"Sets register '#{name}' but it is never used", t if !used[name.ord-?A.ord]
  }

  names
end

def is_datum(token)
  [:int,:char,:str].include?(token.type)
end

def parse_data(tokens, token_ind)
  i = token_ind - 1
  data_tokens = []
  prev = false
  while i < tokens.size
    if is_datum(tokens[i])
      break if prev
      if tokens.size > i+1 && tokens[i].type == :int && tokens[i+1].str[0] == "~" && tokens[i+1].str[-1] == ","
        data_tokens << tokens[i].dup
        data_tokens << tokens[i+1].dup
        data_tokens[-2].str = "-"+data_tokens[-2].str
        data_tokens[-1].str = data_tokens[-1].str[1..-1]
        data_tokens[-1].type == :commas
        i += 1
        prev = false
      else
        prev = true
        data_tokens << tokens[i]
      end
    else
      break if tokens[i].type != :commas
      prev = false
      data_tokens << tokens[i]
    end
    i += 1
  end
  tokens = data_tokens

  depth = tokens.reject{|t|is_datum(t)}.map{|t|t.str.size}.max||0
  token_type = nil
  tokens.select{|t|is_datum(t)}.each{|t|
    token_type ||= t.type
    token_type = :str if token_type != t.type
  }

  value = parse_list(tokens,depth,token_type)
  case token_type
  when :str
    type = CharType
    depth += 1
  when :char
    type = CharType
  when :int
    type = IntType
  else; raise
  end
  [new_op("data", type_and_rank_to_str(type,depth)){ value }, i]
end

def split_tokens(tokens, str)
  tokens.slice_when{|t| t.str == str }.
    map{|ts| ts.reject{|t| t.str == str } }
end

def parse_list(tokens,depth,to_type)
  if depth == 0
    raise IogiiError.new 'found ", ," invalid data format' if tokens.empty?
    parse_datum(tokens[0],to_type)
  else
    to_lazy_list(split_tokens(tokens, ","*depth).map{|ts|
      parse_list(ts, depth-1, to_type)
    })
  end
end

def parse_datum(token,to_type)
  case token.type
  when :int
    val = token.str.to_i
    to_type == :str ? str(val.const) : val
  when :str
    str = token.str
    if str.size > 2 && str[-2..-1] == '"b'
      meat = str[1..-3]
      warn("You have used the raw byte mode on a string that does not contain any binary characters, if you wish to reverse the string instead then add a space before the `b`", token) if meat.bytes.all?{|b| b < 128 }
    else
      meat = str[1..(str.size > 1 && str[-1] == '"' ? -2 : -1)].force_encoding(Encoding.default_external)
    end
    str_to_lazy_list(parse_str(meat))
  when :char
    raise IogiiError.new "empty char", token if token.str.size < 2
    x = parse_char(token.str[1..-1]).ord
    to_type == :str ? [x.const, Null] : x
  end
end

def tokenize_data(tokens, registers)
  new_tokens = []
  token_ind = 0
  loop {
    t = tokens[token_ind]
    token_ind += 1
    break if token_ind > tokens.size
    if registers[t.str] || !is_datum(t)
      new_tokens << t
    else
      orig_token_ind = token_ind
      orig_token = tokens[token_ind-1]
      data_impl, token_ind = *parse_data(tokens, token_ind)
      str = tokens[orig_token_ind-1...token_ind].map(&:str).join
      new_token = Token.new(str, :data, orig_token.char_no, orig_token.line_no)
      new_token.data_impl = data_impl
      new_tokens << new_token
    end
  }
  new_tokens
end

# todo, this is a more expensive of an operation than it needs to be
def is_ancestor(parent, child, nodes)
  return true if parent == child
  nodes[parent].args.any?{|arg| is_ancestor(arg, child, nodes) }
end

def route_to_ancestor(parent, child, nodes)
  return [parent] if parent == child
  nodes[parent].args.each{|arg|
    route = route_to_ancestor(arg, child, nodes)
    return route << parent if route
  }
  nil
end

def build_tree(tokens, registers, null_input)
  # it is important for each token to only be responsible for adding 1 value to the stack, that way rotating the main program can choose a break point at any point. That why adding multiple is accomplished by creating virtual tokens that are responsible for the other values.

  stack = []
  lbs = []
  loop_lbs = []
  all_lbs = []
  nodes = []
  deleted = []
  token_ind = 0
  nimplicits = 0
  t = blocks_can_close_now = nil

  push = -> a { stack << nodes.size; nodes << a; a }
  pop = -> {
    if stack.empty?
      nimplicits += 1
      push[AST.new(t, [], :ImplicitValue)]
    end
    nodes[stack.last].impl = all_lbs[-1] if nodes[stack.last].type == :ImplicitValue
    stack.pop
  }
  peek = -> { ans = pop[]; stack << ans; ans }
  # add a token for this so that it can be wrapped
  insert_dupped = -> dup_ind,pos {
    t_dup = nodes[dup_ind].token
    if !t_dup.dupped_value # (first pass doesn't have dupped tokens yet)
      t_dup.dupped_value = pos.map{|i|
        tokens[token_ind,0] = Token.new(i == 0 ? "dupped_value" : "peek_placeholder", :"VirtualDupped#{i}")
      }
    end
    t_dup.dupped_value.each{|v|v.dupped_value = nodes[dup_ind] }
    blocks_can_close_now = false
  }

  loop {
    t = tokens[token_ind]
    token_ind += 1
    blocks_can_close_now = true
    if token_ind > tokens.size
      break
    elsif registers[t.str]
      push[AST.new(t, [], :VarNode)]
    elsif t.str && t.str[0] == ">"
      if t.matching_bracket
        lb = nodes.size.times.find{|node| nodes[node].token.equal?(t.matching_bracket) }
        all_lbs.delete(lb)
        push[AST.new(t, [pop[]], :MatchedEndBracket, lb)]
        nodes[lb].impl = nodes.size-1
      else
        push[AST.new(t, [pop[]], :UnmatchedEndBracket)]
      end
    elsif t.str == "set" || t.str == "let"
      name = tokens[token_ind].str
      token_ind += 1
      registers[name] = peek[]
      push[AST.new(t, [pop[]], :IdentityNode, nil, "set #{name}")]
      deleted << pop[] if t.str == "let"
    elsif t.str == "del"
      deleted << pop[]
    elsif t.str == "="
      registers[t.sets_var] = peek[]
      push[AST.new(t, [pop[]], :IdentityNode, nil, "set #{t.sets_var}")]
    elsif t.type == :data
      push[AST.new(t, [], :DataNode)]
    elsif t.type == :commas
      raise IogiiError.new "commas must follow data or op that can vectorize", t
    elsif t.type == :VirtualDupped0 || t.type == :VirtualDupped1
      push[AST.new(t, [], t.type)]
    elsif t.type == :id || t.type == :sym
      op = lookup_op(t.str)
      raise IogiiError.new "unknown op", t if !op
      args = op.parse_nargs.times.map{ pop[] }.reverse
      if op.name == "dup"
        push[AST.new(t, args, :IdentityNode)]
        insert_dupped[nodes.size-1,[0]]
      elsif op.name == "peek"
        push[AST.new(t, args, :IdentityNode)]
        insert_dupped[nodes.size-1,[0,1]]
      elsif op.name == "$"
        if all_lbs.empty?
          if null_input.value
            push[AST.new(t, [], :ImplicitValue)]
          else
            nodes << mk_input_node
            push[AST.new(t, [], :ArgNode, nodes.size - 1)]
          end
        else
          push[AST.new(t, [], :ArgNode, all_lbs[-1])]
        end
      elsif op.name == "input"
        push[mk_input_node(t)]
      elsif op.name == "implicit"
        push[AST.new(t, [], :ImplicitValue)]
      elsif op.block
        if ['mdup', 'mpeek'].include?(op.name)
          push[AST.new(t, args, :MDupOp)]
          lbs << nodes.size - 1
        else
          n = AST.new(t, args, :LoopOp)
          n.is_fold_op = ["foldr","foldr0"].include? op.name
          push[n]
          loop_lbs << nodes.size - 1 if !t.matching_bracket
        end
        all_lbs << nodes.size - 1
        blocks_can_close_now = false
      else
        push[AST.new(t, args, :OpNode)]
      end
    else
      raise "unknown token type %p" % t.type.to_s
    end

    # implicitly close meta ops
    while blocks_can_close_now && !lbs.empty? && is_ancestor(peek[], lbs.last, nodes)
      lb = nodes[lb_ind = lbs.pop]
      all_lbs.delete(lb_ind)
      push[AST.new(t, [pop[]], :MatchedEndBracket, lb_ind)]
      lb.impl = nodes.size-1
      insert_dupped[lb_ind,[0]] if ["mdup",";"].include?(lb.token.str)
      insert_dupped[lb_ind,[0,1]] if ["mpeek","!"].include?(lb.token.str)
    end

    # implicitly close loops
    while blocks_can_close_now && !loop_lbs.empty? && is_ancestor(peek[], loop_lbs.last, nodes)
      break if nodes[loop_lbs.last].is_fold_op && !route_to_ancestor(peek[], loop_lbs.last, nodes).any?{|n| nodes[n].args.size > 1 }
      lb = nodes[lb_ind = loop_lbs.pop]
      all_lbs.delete(lb_ind)
      push[AST.new(t, [pop[]], :MatchedEndBracket, lb_ind)]
      lb.impl = nodes.size-1
    end
  }

  [nodes, stack, deleted, nimplicits, all_lbs]
end

def mk_input_node(token = nil)
  $input_checked = true
  AST.new(token, [], :InputNode)
end

def match_brackets(nodes, node, rbs, rb_counts)
  if nodes[node].type == :UnmatchedEndBracket
    rbs << node
    rb_counts << nodes[node].token.str.count(",")
  end

  nodes[node].args.each{|arg| match_brackets(nodes, arg, rbs, rb_counts) }
  if nodes[node].type == :LoopOp
    i = 0
    while i < rbs.size && rb_counts[i] > 0
      rb_counts[i] -= 1
      i += 1
    end

    if i < rbs.size
      rb = rbs.delete_at(i)
      rb_counts.delete_at(i)
      nodes[rb].type = :MatchedEndBracket
      nodes[node].token.matching_bracket = nodes[rb].token
      nodes[rb].token.matching_bracket = nodes[node].token
    end
  end
  if rbs[-1] == node # unmatched
    rbs.pop
    rb_counts.pop
  end
end

def parse_main(tokens, raw_mode)
  registers = get_var_ids_set(tokens)
  tokens = split_ids(tokens, registers)
  tokens = tokenize_data(tokens, registers)
  null_input = promise{ $input_checked = true; !raw_mode && $ReadStdinLines.empty }
  nodes, stack, deleted, nimplicits, lbs = build_tree(tokens, registers, null_input)
  if nimplicits > 0
    unfinished_extra = lbs.map{|lb|
      case nodes[lb].token.str
        when ";", "mdup"; 1
        when "!", "mpeek"; 2
        else; 0
      end
    }.sum
    # we don't need as many implicits since rotating will add extra dupped values not yet present
    nimplicits = [nimplicits - unfinished_extra, 1].max
    nrot = [nimplicits - 1, stack.size - 1].min

    if stack.empty?
      last_token_ind = tokens.size
    else
      last_token = nodes[stack[~nrot]].token
      last_token_ind = tokens.rindex{|t|t.equal? last_token}
      last_token_ind += last_token.str == "let" || last_token.str == "set" ? 2 : 1
    end

    input = Token.new(null_input.value ? "implicit" : "input", :id)

    tokens = tokens[last_token_ind..-1] + [input] + tokens[0,last_token_ind]

#     puts tokens.map(&:str)*" "

    # build the tree after literally rotating the tokens, rather than manipulate the tree because the program rotation could have interupted functions which would not have been able to implicitly close
    nodes, stack, deleted, nimplicits, lbs = build_tree(tokens, registers, null_input)
  end

  (stack + deleted).each{|si| match_brackets(nodes, si, [], []) }

  unmatched = nodes.filter{|n| n.type == :UnmatchedEndBracket }
  raise IogiiError.new("implicit value set twice", unmatched[1]) if unmatched.size > 1
  implicit_override = unmatched[0].args[0] if !unmatched.empty?

  nodes, stack, deleted, nimplicits, lbs = build_tree(tokens, registers, null_input)
  raise IogiiError.new("unterminated function", nodes[lbs[-1]]) if !lbs.empty?

  unmatched = nodes.filter{|n| n.type == :UnmatchedEndBracket }
  raise IogiiError.new("implicit value set twice", unmatched[1]) if unmatched.size > 1
  implicit_override = unmatched[0].args[0] if !unmatched.empty?

  # set implicits
  implicit_nodes = nodes.filter{|n| n.type == :ImplicitValue && n.args == [] }
  if implicit_override
    raise IogiiError.new("implicit value set but not used") if implicit_nodes.empty?
    implicit_nodes.each{|n| n.impl = implicit_override }
  else
    implicit_nodes.each{|n|
      if n.impl == nil
        nodes << mk_input_node
        n.impl = nodes.size - 1
      end
    }
  end

  [nodes, main_inds=stack, registers]
end
