class Token < Struct.new(:str, :type, :char_no, :line_no, :explicit_end, :surrounded_by_whitespace)
  def split
    trailing_commas = str[/,*$/]
    str_base = $`
    c = char_no
    results = str_base.scan(/[^1-9]|[0-9]+/).map{|s|
      t = Token.new(s, s=~/[0-9]/ ? :int : :id, c, line_no)
      c += s.size
      t
    }
    if !trailing_commas.empty?
      if results[-1].str =~ /[a-zA-Z]/
        results[-1].str += trailing_commas
      else
        results << Token.new(trailing_commas, :commas, c, line_no)
      end
    end

    results
  end
end
def split_tokens(tokens, str)
  tokens.slice_when{|t| t.str == str }.
    map{|ts| ts.reject{|t| t.str == str } }
end
AllSymbols='@!?`~#%^&*-_=+[]|;<>.,()\'"{}$/\\:'.chars.map{|c|Regexp.escape c}

TokenTypes = {
  :comment => /# .*|\n#\n/, # also match \n#\n since many editors remove trailing spaces on save
  :int => /0|[0-9]+/,
  :char => /'(\\n|\\0|\\x[0-9a-fA-F][0-9a-fA-F]|.)?/m,
  :str => /"(\\.|[^"])*"?/m,
  :space => /[ \t]/,
  :newline => /\r\n|\r|\n/,
  :id => /[^#{AllSymbols.join} \t\n\r0-9][^#{AllSymbols.join} \t\n\r]*,*/,
  :commas => /,+/,
  :sym => /.,*/,
}
NewlineRx = TokenTypes[:newline]

LexRx = /#{TokenTypes.map{|name, rx|"(?<#{name}>#{rx.to_s})"}*"|"}/

def lex(code, line_no=1)
	tokens = []
  char_no = 1
  code.scan(LexRx) {
    match = $&
    type = $~.named_captures.select{|k, v|v}.keys[0].to_sym
    token=Token.new(match, type, char_no, line_no)
    token.surrounded_by_whitespace = [$`[-1], $'[0]].all?{|s| (s||" ") =~ /\s/ }
    line_no += match.scan(NewlineRx).size
    if match[NewlineRx]
      char_no = match.size-match.rindex(NewlineRx)
    else
      # FYI this counts tab as 1, and utf8 characters as len of their bytes, could be misleading
      char_no += match.size
  	end
    tokens << token if ![:space, :newline, :comment].include? type
  }
  tokens
end
