go-ethereum/core/asm/lexer.go
Felix Lange e9f78db79d
cmd/evm: fix some issues with the evm run command (#28109)
* cmd/evm: improve flags handling

This fixes some issues with flags in cmd/evm. The supported flags did not
actually show up in help output because they weren't categorized. I'm also
adding the VM-related flags to the run command here so they can be given
after the subcommand name. So it can be run like this now:

   ./evm run --code 6001 --debug

* cmd/evm: enable all forks by default in run command

The default genesis was just empty with no forks at all, which is annoying because
contracts will be relying on opcodes introduced in a fork. So this changes the default to
have all forks enabled.

* core/asm: fix some issues in the assembler

This fixes minor bugs in the old assembler:

- It is now possible to have comments on the same line as an instruction.
- Errors for invalid numbers in the jump instruction are reported better
- Line numbers in errors were off by one
2023-09-19 13:41:16 +02:00

276 lines
6.6 KiB
Go

// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package asm
import (
"fmt"
"os"
"strings"
"unicode"
"unicode/utf8"
)
// stateFn is used through the lifetime of the
// lexer to parse the different values at the
// current state.
type stateFn func(*lexer) stateFn
// token is emitted when the lexer has discovered
// a new parsable token. These are delivered over
// the tokens channels of the lexer
type token struct {
typ tokenType
lineno int
text string
}
// tokenType are the different types the lexer
// is able to parse and return.
type tokenType int
//go:generate go run golang.org/x/tools/cmd/stringer -type tokenType
const (
eof tokenType = iota // end of file
lineStart // emitted when a line starts
lineEnd // emitted when a line ends
invalidStatement // any invalid statement
element // any element during element parsing
label // label is emitted when a label is found
labelDef // label definition is emitted when a new label is found
number // number is emitted when a number is found
stringValue // stringValue is emitted when a string has been found
)
const (
decimalNumbers = "1234567890" // characters representing any decimal number
hexNumbers = decimalNumbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
)
// lexer is the basic construct for parsing
// source code and turning them in to tokens.
// Tokens are interpreted by the compiler.
type lexer struct {
input string // input contains the source code of the program
tokens chan token // tokens is used to deliver tokens to the listener
state stateFn // the current state function
lineno int // current line number in the source file
start, pos, width int // positions for lexing and returning value
debug bool // flag for triggering debug output
}
// Lex lexes the program by name with the given source. It returns a
// channel on which the tokens are delivered.
func Lex(source []byte, debug bool) <-chan token {
ch := make(chan token)
l := &lexer{
input: string(source),
tokens: ch,
state: lexLine,
debug: debug,
}
go func() {
l.emit(lineStart)
for l.state != nil {
l.state = l.state(l)
}
l.emit(eof)
close(l.tokens)
}()
return ch
}
// next returns the next rune in the program's source.
func (l *lexer) next() (rune rune) {
if l.pos >= len(l.input) {
l.width = 0
return 0
}
rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return rune
}
// backup backsup the last parsed element (multi-character)
func (l *lexer) backup() {
l.pos -= l.width
}
// peek returns the next rune but does not advance the seeker
func (l *lexer) peek() rune {
r := l.next()
l.backup()
return r
}
// ignore advances the seeker and ignores the value
func (l *lexer) ignore() {
l.start = l.pos
}
// Accepts checks whether the given input matches the next rune
func (l *lexer) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
// acceptRun will continue to advance the seeker until valid
// can no longer be met.
func (l *lexer) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
// acceptRunUntil is the inverse of acceptRun and will continue
// to advance the seeker until the rune has been found.
func (l *lexer) acceptRunUntil(until rune) bool {
// Continues running until a rune is found
for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() {
if i == 0 {
return false
}
}
return true
}
// blob returns the current value
func (l *lexer) blob() string {
return l.input[l.start:l.pos]
}
// Emits a new token on to token channel for processing
func (l *lexer) emit(t tokenType) {
token := token{t, l.lineno, l.blob()}
if l.debug {
fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text)
}
l.tokens <- token
l.start = l.pos
}
// lexLine is state function for lexing lines
func lexLine(l *lexer) stateFn {
for {
switch r := l.next(); {
case r == '\n':
l.emit(lineEnd)
l.ignore()
l.lineno++
l.emit(lineStart)
case r == ';' && l.peek() == ';':
return lexComment
case isSpace(r):
l.ignore()
case isLetter(r) || r == '_':
return lexElement
case isNumber(r):
return lexNumber
case r == '@':
l.ignore()
return lexLabel
case r == '"':
return lexInsideString
default:
return nil
}
}
}
// lexComment parses the current position until the end
// of the line and discards the text.
func lexComment(l *lexer) stateFn {
l.acceptRunUntil('\n')
l.backup()
l.ignore()
return lexLine
}
// lexLabel parses the current label, emits and returns
// the lex text state function to advance the parsing
// process.
func lexLabel(l *lexer) stateFn {
l.acceptRun(alpha + "_" + decimalNumbers)
l.emit(label)
return lexLine
}
// lexInsideString lexes the inside of a string until
// the state function finds the closing quote.
// It returns the lex text state function.
func lexInsideString(l *lexer) stateFn {
if l.acceptRunUntil('"') {
l.emit(stringValue)
}
return lexLine
}
func lexNumber(l *lexer) stateFn {
acceptance := decimalNumbers
if l.accept("xX") {
acceptance = hexNumbers
}
l.acceptRun(acceptance)
l.emit(number)
return lexLine
}
func lexElement(l *lexer) stateFn {
l.acceptRun(alpha + "_" + decimalNumbers)
if l.peek() == ':' {
l.emit(labelDef)
l.accept(":")
l.ignore()
} else {
l.emit(element)
}
return lexLine
}
func isLetter(t rune) bool {
return unicode.IsLetter(t)
}
func isSpace(t rune) bool {
return unicode.IsSpace(t)
}
func isNumber(t rune) bool {
return unicode.IsNumber(t)
}