go-ethereum/core/vm/eof.go
Martin HS 56c4f2bfd4
core/vm, cmd/evm: implement eof validation (#30418)
The bulk of this PR is authored by @lightclient , in the original
EOF-work. More recently, the code has been picked up and reworked for the new EOF
specification, by @MariusVanDerWijden , in https://github.com/ethereum/go-ethereum/pull/29518, and also @shemnon has contributed with fixes.

This PR is an attempt to start eating the elephant one small bite at a
time, by selecting only the eof-validation as a standalone piece which
can be merged without interfering too much in the core stuff.

In this PR: 

- [x] Validation of eof containers, lifted from #29518, along with
test-vectors from consensus-tests and fuzzing, to ensure that the move
did not lose any functionality.
- [x] Definition of eof opcodes, which is a prerequisite for validation
- [x] Addition of `undefined` to a jumptable entry item. I'm not
super-happy with this, but for the moment it seems the least invasive
way to do it. A better way might be to go back and allowing nil-items or
nil execute-functions to denote "undefined".
- [x] benchmarks of eof validation speed 


---------

Co-authored-by: lightclient <lightclient@protonmail.com>
Co-authored-by: Marius van der Wijden <m.vanderwijden@live.de>
Co-authored-by: Danno Ferrin <danno.ferrin@shemnon.com>
2024-10-02 15:05:50 +02:00

502 lines
15 KiB
Go

// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package vm
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"strings"
"github.com/ethereum/go-ethereum/params"
)
const (
offsetVersion = 2
offsetTypesKind = 3
offsetCodeKind = 6
kindTypes = 1
kindCode = 2
kindContainer = 3
kindData = 4
eofFormatByte = 0xef
eof1Version = 1
maxInputItems = 127
maxOutputItems = 128
maxStackHeight = 1023
maxContainerSections = 256
)
var eofMagic = []byte{0xef, 0x00}
// HasEOFByte returns true if code starts with 0xEF byte
func HasEOFByte(code []byte) bool {
return len(code) != 0 && code[0] == eofFormatByte
}
// hasEOFMagic returns true if code starts with magic defined by EIP-3540
func hasEOFMagic(code []byte) bool {
return len(eofMagic) <= len(code) && bytes.Equal(eofMagic, code[0:len(eofMagic)])
}
// isEOFVersion1 returns true if the code's version byte equals eof1Version. It
// does not verify the EOF magic is valid.
func isEOFVersion1(code []byte) bool {
return 2 < len(code) && code[2] == byte(eof1Version)
}
// Container is an EOF container object.
type Container struct {
types []*functionMetadata
codeSections [][]byte
subContainers []*Container
subContainerCodes [][]byte
data []byte
dataSize int // might be more than len(data)
}
// functionMetadata is an EOF function signature.
type functionMetadata struct {
inputs uint8
outputs uint8
maxStackHeight uint16
}
// stackDelta returns the #outputs - #inputs
func (meta *functionMetadata) stackDelta() int {
return int(meta.outputs) - int(meta.inputs)
}
// checkInputs checks the current minimum stack (stackMin) against the required inputs
// of the metadata, and returns an error if the stack is too shallow.
func (meta *functionMetadata) checkInputs(stackMin int) error {
if int(meta.inputs) > stackMin {
return ErrStackUnderflow{stackLen: stackMin, required: int(meta.inputs)}
}
return nil
}
// checkStackMax checks the if current maximum stack combined with the
// functin max stack will result in a stack overflow, and if so returns an error.
func (meta *functionMetadata) checkStackMax(stackMax int) error {
newMaxStack := stackMax + int(meta.maxStackHeight) - int(meta.inputs)
if newMaxStack > int(params.StackLimit) {
return ErrStackOverflow{stackLen: newMaxStack, limit: int(params.StackLimit)}
}
return nil
}
// MarshalBinary encodes an EOF container into binary format.
func (c *Container) MarshalBinary() []byte {
// Build EOF prefix.
b := make([]byte, 2)
copy(b, eofMagic)
b = append(b, eof1Version)
// Write section headers.
b = append(b, kindTypes)
b = binary.BigEndian.AppendUint16(b, uint16(len(c.types)*4))
b = append(b, kindCode)
b = binary.BigEndian.AppendUint16(b, uint16(len(c.codeSections)))
for _, codeSection := range c.codeSections {
b = binary.BigEndian.AppendUint16(b, uint16(len(codeSection)))
}
var encodedContainer [][]byte
if len(c.subContainers) != 0 {
b = append(b, kindContainer)
b = binary.BigEndian.AppendUint16(b, uint16(len(c.subContainers)))
for _, section := range c.subContainers {
encoded := section.MarshalBinary()
b = binary.BigEndian.AppendUint16(b, uint16(len(encoded)))
encodedContainer = append(encodedContainer, encoded)
}
}
b = append(b, kindData)
b = binary.BigEndian.AppendUint16(b, uint16(c.dataSize))
b = append(b, 0) // terminator
// Write section contents.
for _, ty := range c.types {
b = append(b, []byte{ty.inputs, ty.outputs, byte(ty.maxStackHeight >> 8), byte(ty.maxStackHeight & 0x00ff)}...)
}
for _, code := range c.codeSections {
b = append(b, code...)
}
for _, section := range encodedContainer {
b = append(b, section...)
}
b = append(b, c.data...)
return b
}
// UnmarshalBinary decodes an EOF container.
func (c *Container) UnmarshalBinary(b []byte, isInitcode bool) error {
return c.unmarshalContainer(b, isInitcode, true)
}
// UnmarshalSubContainer decodes an EOF container that is inside another container.
func (c *Container) UnmarshalSubContainer(b []byte, isInitcode bool) error {
return c.unmarshalContainer(b, isInitcode, false)
}
func (c *Container) unmarshalContainer(b []byte, isInitcode bool, topLevel bool) error {
if !hasEOFMagic(b) {
return fmt.Errorf("%w: want %x", errInvalidMagic, eofMagic)
}
if len(b) < 14 {
return io.ErrUnexpectedEOF
}
if len(b) > params.MaxInitCodeSize {
return ErrMaxInitCodeSizeExceeded
}
if !isEOFVersion1(b) {
return fmt.Errorf("%w: have %d, want %d", errInvalidVersion, b[2], eof1Version)
}
var (
kind, typesSize, dataSize int
codeSizes []int
err error
)
// Parse type section header.
kind, typesSize, err = parseSection(b, offsetTypesKind)
if err != nil {
return err
}
if kind != kindTypes {
return fmt.Errorf("%w: found section kind %x instead", errMissingTypeHeader, kind)
}
if typesSize < 4 || typesSize%4 != 0 {
return fmt.Errorf("%w: type section size must be divisible by 4, have %d", errInvalidTypeSize, typesSize)
}
if typesSize/4 > 1024 {
return fmt.Errorf("%w: type section must not exceed 4*1024, have %d", errInvalidTypeSize, typesSize)
}
// Parse code section header.
kind, codeSizes, err = parseSectionList(b, offsetCodeKind)
if err != nil {
return err
}
if kind != kindCode {
return fmt.Errorf("%w: found section kind %x instead", errMissingCodeHeader, kind)
}
if len(codeSizes) != typesSize/4 {
return fmt.Errorf("%w: mismatch of code sections found and type signatures, types %d, code %d", errInvalidCodeSize, typesSize/4, len(codeSizes))
}
// Parse (optional) container section header.
var containerSizes []int
offset := offsetCodeKind + 2 + 2*len(codeSizes) + 1
if offset < len(b) && b[offset] == kindContainer {
kind, containerSizes, err = parseSectionList(b, offset)
if err != nil {
return err
}
if kind != kindContainer {
panic("somethings wrong")
}
if len(containerSizes) == 0 {
return fmt.Errorf("%w: total container count must not be zero", errInvalidContainerSectionSize)
}
offset = offset + 2 + 2*len(containerSizes) + 1
}
// Parse data section header.
kind, dataSize, err = parseSection(b, offset)
if err != nil {
return err
}
if kind != kindData {
return fmt.Errorf("%w: found section %x instead", errMissingDataHeader, kind)
}
c.dataSize = dataSize
// Check for terminator.
offsetTerminator := offset + 3
if len(b) < offsetTerminator {
return fmt.Errorf("%w: invalid offset terminator", io.ErrUnexpectedEOF)
}
if b[offsetTerminator] != 0 {
return fmt.Errorf("%w: have %x", errMissingTerminator, b[offsetTerminator])
}
// Verify overall container size.
expectedSize := offsetTerminator + typesSize + sum(codeSizes) + dataSize + 1
if len(containerSizes) != 0 {
expectedSize += sum(containerSizes)
}
if len(b) < expectedSize-dataSize {
return fmt.Errorf("%w: have %d, want %d", errInvalidContainerSize, len(b), expectedSize)
}
// Only check that the expected size is not exceed on non-initcode
if (!topLevel || !isInitcode) && len(b) > expectedSize {
return fmt.Errorf("%w: have %d, want %d", errInvalidContainerSize, len(b), expectedSize)
}
// Parse types section.
idx := offsetTerminator + 1
var types = make([]*functionMetadata, 0, typesSize/4)
for i := 0; i < typesSize/4; i++ {
sig := &functionMetadata{
inputs: b[idx+i*4],
outputs: b[idx+i*4+1],
maxStackHeight: binary.BigEndian.Uint16(b[idx+i*4+2:]),
}
if sig.inputs > maxInputItems {
return fmt.Errorf("%w for section %d: have %d", errTooManyInputs, i, sig.inputs)
}
if sig.outputs > maxOutputItems {
return fmt.Errorf("%w for section %d: have %d", errTooManyOutputs, i, sig.outputs)
}
if sig.maxStackHeight > maxStackHeight {
return fmt.Errorf("%w for section %d: have %d", errTooLargeMaxStackHeight, i, sig.maxStackHeight)
}
types = append(types, sig)
}
if types[0].inputs != 0 || types[0].outputs != 0x80 {
return fmt.Errorf("%w: have %d, %d", errInvalidSection0Type, types[0].inputs, types[0].outputs)
}
c.types = types
// Parse code sections.
idx += typesSize
codeSections := make([][]byte, len(codeSizes))
for i, size := range codeSizes {
if size == 0 {
return fmt.Errorf("%w for section %d: size must not be 0", errInvalidCodeSize, i)
}
codeSections[i] = b[idx : idx+size]
idx += size
}
c.codeSections = codeSections
// Parse the optional container sizes.
if len(containerSizes) != 0 {
if len(containerSizes) > maxContainerSections {
return fmt.Errorf("%w number of container section exceed: %v: have %v", errInvalidContainerSectionSize, maxContainerSections, len(containerSizes))
}
subContainerCodes := make([][]byte, 0, len(containerSizes))
subContainers := make([]*Container, 0, len(containerSizes))
for i, size := range containerSizes {
if size == 0 || idx+size > len(b) {
return fmt.Errorf("%w for section %d: size must not be 0", errInvalidContainerSectionSize, i)
}
subC := new(Container)
end := min(idx+size, len(b))
if err := subC.unmarshalContainer(b[idx:end], isInitcode, false); err != nil {
if topLevel {
return fmt.Errorf("%w in sub container %d", err, i)
}
return err
}
subContainers = append(subContainers, subC)
subContainerCodes = append(subContainerCodes, b[idx:end])
idx += size
}
c.subContainers = subContainers
c.subContainerCodes = subContainerCodes
}
//Parse data section.
end := len(b)
if !isInitcode {
end = min(idx+dataSize, len(b))
}
if topLevel && len(b) != idx+dataSize {
return errTruncatedTopLevelContainer
}
c.data = b[idx:end]
return nil
}
// ValidateCode validates each code section of the container against the EOF v1
// rule set.
func (c *Container) ValidateCode(jt *JumpTable, isInitCode bool) error {
refBy := notRefByEither
if isInitCode {
refBy = refByEOFCreate
}
return c.validateSubContainer(jt, refBy)
}
func (c *Container) validateSubContainer(jt *JumpTable, refBy int) error {
visited := make(map[int]struct{})
subContainerVisited := make(map[int]int)
toVisit := []int{0}
for len(toVisit) > 0 {
// TODO check if this can be used as a DOS
// Theres and edge case here where we mark something as visited that we visit before,
// This should not trigger a re-visit
// e.g. 0 -> 1, 2, 3
// 1 -> 2, 3
// should not mean 2 and 3 should be visited twice
var (
index = toVisit[0]
code = c.codeSections[index]
)
if _, ok := visited[index]; !ok {
res, err := validateCode(code, index, c, jt, refBy == refByEOFCreate)
if err != nil {
return err
}
visited[index] = struct{}{}
// Mark all sections that can be visited from here.
for idx := range res.visitedCode {
if _, ok := visited[idx]; !ok {
toVisit = append(toVisit, idx)
}
}
// Mark all subcontainer that can be visited from here.
for idx, reference := range res.visitedSubContainers {
// Make sure subcontainers are only ever referenced by either EOFCreate or ReturnContract
if ref, ok := subContainerVisited[idx]; ok && ref != reference {
return errors.New("section referenced by both EOFCreate and ReturnContract")
}
subContainerVisited[idx] = reference
}
if refBy == refByReturnContract && res.isInitCode {
return errIncompatibleContainerKind
}
if refBy == refByEOFCreate && res.isRuntime {
return errIncompatibleContainerKind
}
}
toVisit = toVisit[1:]
}
// Make sure every code section is visited at least once.
if len(visited) != len(c.codeSections) {
return errUnreachableCode
}
for idx, container := range c.subContainers {
reference, ok := subContainerVisited[idx]
if !ok {
return errOrphanedSubcontainer
}
if err := container.validateSubContainer(jt, reference); err != nil {
return err
}
}
return nil
}
// parseSection decodes a (kind, size) pair from an EOF header.
func parseSection(b []byte, idx int) (kind, size int, err error) {
if idx+3 >= len(b) {
return 0, 0, io.ErrUnexpectedEOF
}
kind = int(b[idx])
size = int(binary.BigEndian.Uint16(b[idx+1:]))
return kind, size, nil
}
// parseSectionList decodes a (kind, len, []codeSize) section list from an EOF
// header.
func parseSectionList(b []byte, idx int) (kind int, list []int, err error) {
if idx >= len(b) {
return 0, nil, io.ErrUnexpectedEOF
}
kind = int(b[idx])
list, err = parseList(b, idx+1)
if err != nil {
return 0, nil, err
}
return kind, list, nil
}
// parseList decodes a list of uint16..
func parseList(b []byte, idx int) ([]int, error) {
if len(b) < idx+2 {
return nil, io.ErrUnexpectedEOF
}
count := binary.BigEndian.Uint16(b[idx:])
if len(b) <= idx+2+int(count)*2 {
return nil, io.ErrUnexpectedEOF
}
list := make([]int, count)
for i := 0; i < int(count); i++ {
list[i] = int(binary.BigEndian.Uint16(b[idx+2+2*i:]))
}
return list, nil
}
// parseUint16 parses a 16 bit unsigned integer.
func parseUint16(b []byte) (int, error) {
if len(b) < 2 {
return 0, io.ErrUnexpectedEOF
}
return int(binary.BigEndian.Uint16(b)), nil
}
// parseInt16 parses a 16 bit signed integer.
func parseInt16(b []byte) int {
return int(int16(b[1]) | int16(b[0])<<8)
}
// sum computes the sum of a slice.
func sum(list []int) (s int) {
for _, n := range list {
s += n
}
return
}
func (c *Container) String() string {
var output = []string{
"Header",
fmt.Sprintf(" - EOFMagic: %02x", eofMagic),
fmt.Sprintf(" - EOFVersion: %02x", eof1Version),
fmt.Sprintf(" - KindType: %02x", kindTypes),
fmt.Sprintf(" - TypesSize: %04x", len(c.types)*4),
fmt.Sprintf(" - KindCode: %02x", kindCode),
fmt.Sprintf(" - KindData: %02x", kindData),
fmt.Sprintf(" - DataSize: %04x", len(c.data)),
fmt.Sprintf(" - Number of code sections: %d", len(c.codeSections)),
}
for i, code := range c.codeSections {
output = append(output, fmt.Sprintf(" - Code section %d length: %04x", i, len(code)))
}
output = append(output, fmt.Sprintf(" - Number of subcontainers: %d", len(c.subContainers)))
if len(c.subContainers) > 0 {
for i, section := range c.subContainers {
output = append(output, fmt.Sprintf(" - subcontainer %d length: %04x\n", i, len(section.MarshalBinary())))
}
}
output = append(output, "Body")
for i, typ := range c.types {
output = append(output, fmt.Sprintf(" - Type %v: %x", i,
[]byte{typ.inputs, typ.outputs, byte(typ.maxStackHeight >> 8), byte(typ.maxStackHeight & 0x00ff)}))
}
for i, code := range c.codeSections {
output = append(output, fmt.Sprintf(" - Code section %d: %#x", i, code))
}
for i, section := range c.subContainers {
output = append(output, fmt.Sprintf(" - Subcontainer %d: %x", i, section.MarshalBinary()))
}
output = append(output, fmt.Sprintf(" - Data: %#x", c.data))
return strings.Join(output, "\n")
}