class Token < Struct.new(
  :str, :type, :char_no, :line_no,
  :surrounded_by_whitespace, # for split ids warning
  :sets_var, # for drawing AST
  :data_impl, # proc that returns data value for data tokens
  :matching_bracket,
  :duped_value, # for dup tokens it points to virtual corresponding token,
                 # for that virtual token, it points to AST node position
  :implicit_used)
  def split
    trailing_commas = str[/,*$/]
    str_base = $`
    c = char_no
    results = str_base.scan(/[^1-9]|[0-9]+/).map{|s|
      t = Token.new(s, s=~/[0-9]/ ? :int : :id, c, line_no)
      c += s.size
      t
    }
    if !trailing_commas.empty?
      if results[-1].str =~ /[a-zA-Z]/
        results[-1].str += trailing_commas
      else
        results << Token.new(trailing_commas, :commas, c, line_no)
      end
    end

    results
  end
end
AllSymbols='@!?`~#%^&*-_=+[]|;<>.,()\'"{}$/\\:'.chars.map{|c|Regexp.escape c}

TokenTypes = {
  :comment => /# .*|#\n/, # also match #\n since many editors remove trailing spaces on save
  :int => /0|[0-9]+/,
  :char => /'(\\n|\\0|\\x[0-9a-fA-F][0-9a-fA-F]|[\0-\x7F])?/m,
  :str => /"(\\.|[^"])*("b?)?/m,
  :space => /[ \t]/,
  :newline => /\r\n|\r|\n/,
  :id => /[^#{AllSymbols.join} \t\n\r0-9][^#{AllSymbols.join} \t\n\r]*,*/,
  :commas => /,+/,
  :sym => /.,*/,
}
NewlineRx = TokenTypes[:newline]

def lex(code, line_no=1)
	tokens = []
  char_no = 1
  prev = ""

  # only convert to utf8 as needed since there could be invalid uf8 chars inside a ""b string which would break regexes/etc.
  until code.empty?
    TokenTypes.each{|type, regex|
      if code.start_with? regex
        match = $&
        code = $'

        if match == "'" && !code.empty? # utf8 char
          char = code.dup.force_encoding(Encoding.default_external).chars[0].b
          match << char
          code[0,char.size] = ''
        end

        match.force_encoding(Encoding.default_external) unless type == :str && parse_str(match)[1] == '"b'
        begin
          match.codepoints
        rescue => e # e.g. invalid byte sequence in UTF-8
          if [:str,:char,:comment].include? type
            match = match.b
          else
            raise IogiiError.new e.message + " after token", tokens[-1]
          end
        end
        token=Token.new(match, type, char_no, line_no)
        token.surrounded_by_whitespace = [prev[-1], $'[0]].all?{|s| (s||" ") =~ /\s/ }
        line_no += match.scan(NewlineRx).size
        if match[NewlineRx]
          char_no = match.size-match.rindex(NewlineRx)
        else
          # FYI this counts tab as 1, could be misleading
          char_no += match.size
        end
        tokens << token if ![:space, :newline, :comment].include? type
        prev = match
        break
      end
    }
  end

  tokens
end
