feat: add length to errors, performance improvements

danielgtaylor · Nov 20, 2021 · 378f5a7 · 378f5a7
1 parent 9d4e34d
commit 378f5a7
Show file tree

Hide file tree

Showing 8 changed files with 400 additions and 139 deletions.
diff --git a/README.md b/README.md
@@ -9,7 +9,8 @@ This library was originally built for use in templating languages (e.g. for-loop
 Features:
 
 - Fast, low-allocation parser and runtime
-- Type checking
+  - Many simple expressions are zero-allocation
+- Type checking during parsing
 - Simple
   - Easy to learn
   - Easy to read
@@ -24,24 +25,24 @@ Features:
 
 ## Usage
 
-Try it out on the [Go Playground](https://play.golang.org/p/Z0UcEBgfxu_r)!
+Try it out on the [Go Playground](https://play.golang.org/p/Z0UcEBgfxu_r)! You can find many [example expressions in the tests](https://github.com/danielgtaylor/mexpr/blob/main/interpreter_test.go#L18).
 
 ```go
 import "github.com/danielgtaylor/mexpr"
 
 // Convenience for lexing/parsing/running in one step:
-result, err := mexpr.Eval("a + b", map[string]interface{}{
-	"a": 1,
-	"b": 2,
+result, err := mexpr.Eval("a > b", map[string]interface{}{
+	"a": 2,
+	"b": 1,
 })
 
 // Manual method with type checking and fast AST re-use. Error handling is
 // omitted for brevity.
-l := mexpr.NewLexer("a + b")
+l := mexpr.NewLexer("a > b")
 p := mexpr.NewParser(l)
 ast, err := mexpr.Parse()
 typeExamples = map[string]interface{}{
-	"a": 1,
+	"a": 2,
 	"b": 1,
 }
 err := mexpr.TypeCheck(ast, typeExamples)
@@ -71,7 +72,7 @@ if err != nil {
 ### Literals
 
 - **strings** double quoted e.g. `"hello"`
-- **numbers** e.g. `123`, `2.5`
+- **numbers** e.g. `123`, `2.5`, `1_000_000`
 
 Internally all numbers are treated as `float64`, which means fewer conversions/casts when taking arbitrary JSON/YAML inputs.
 
@@ -158,20 +159,59 @@ Indexes are zero-based. Slice indexes are optional and are _inclusive_. `foo[1:2
 
 ## Performance
 
-Performance compares favorably to [antonmedv/expr](https://github.com/antonmedv/expr) for both `Eval(...)` and cached program performance, which is expected given the more limited feature set. The example expression used is non-trivial: `foo.bar / 2 * (2 + 4 / 2) == 20 and "v" in baz`.
+Performance compares favorably to [antonmedv/expr](https://github.com/antonmedv/expr) for both `Eval(...)` and cached program performance, which is expected given the more limited feature set. The `slow` benchmarks include lexing/parsing/interpreting while the `cached` ones are just the interpreting step. The `complex` example expression used is non-trivial: `foo.bar / (1 * 1024 * 1024) >= 1.0 and "v" in baz and baz.length > 3 and arr[2:].length == 1`.
 
 ```
-$ go test -bench=. -benchtime=5s
 goos: darwin
 goarch: amd64
 pkg: github.com/danielgtaylor/mexpr
 cpu: Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
-BenchmarkMexpr-12             378998      2988 ns/op    1360 B/op    37 allocs/op
-BenchmarkMexprCached-12      9591484     120.9 ns/op      16 B/op     2 allocs/op
-BenchmarkLibExpr-12           621049      9300 ns/op    7474 B/op    75 allocs/op
-BenchmarkLibExprCached-12   14324178     412.1 ns/op      96 B/op     6 allocs/op
+Benchmark/mexpr-field-slow-12           3673572       286.5 ns/op    144 B/op      6 allocs/op
+Benchmark/_expr-field-slow-12            956689      1276 ns/op     1096 B/op     23 allocs/op
+
+Benchmark/mexpr-comparison-slow-12      1000000      1020 ns/op      656 B/op     16 allocs/op
+Benchmark/_expr-comparison-slow-12       383491      3069 ns/op     2224 B/op     38 allocs/op
+
+Benchmark/mexpr-logical-slow-12         1000000      1063 ns/op      464 B/op     17 allocs/op
+Benchmark/_expr-logical-slow-12          292824      4148 ns/op     2336 B/op     38 allocs/op
+
+Benchmark/mexpr-math-slow-12            1000000      1035 ns/op      656 B/op     16 allocs/op
+Benchmark/_expr-math-slow-12             399708      3004 ns/op     2184 B/op     38 allocs/op
+
+Benchmark/mexpr-string-slow-12          1822945       655.6 ns/op    258 B/op     10 allocs/op
+Benchmark/_expr-string-slow-12           428604      2508 ns/op     1640 B/op     35 allocs/op
+
+Benchmark/mexpr-index-slow-12           2015856       592.0 ns/op    280 B/op     10 allocs/op
+Benchmark/_expr-index-slow-12            517360      2301 ns/op     1872 B/op     30 allocs/op
+
+Benchmark/mexpr-complex-slow-12          244039      5078 ns/op     2232 B/op     64 allocs/op
+Benchmark/_expr-complex-slow-12           69387     16825 ns/op    14378 B/op    107 allocs/op
+
+Benchmark/mexpr-field-cached-12       100000000        11.37 ns/op     0 B/op      0 allocs/op
+Benchmark/_expr-field-cached-12         7761153       146.5 ns/op     48 B/op      2 allocs/op
+
+Benchmark/mexpr-comparison-cached-12   38098502        30.93 ns/op     0 B/op      0 allocs/op
+Benchmark/_expr-comparison-cached-12    4563463       251.0 ns/op     64 B/op      3 allocs/op
+
+Benchmark/mexpr-logical-cached-12      37563720        31.35 ns/op     0 B/op      0 allocs/op
+Benchmark/_expr-logical-cached-12      11000991       105.9 ns/op     32 B/op      1 allocs/op
+
+Benchmark/mexpr-math-cached-12         24463279        47.41 ns/op     8 B/op      1 allocs/op
+Benchmark/_expr-math-cached-12          4531693       268.0 ns/op     72 B/op      4 allocs/op
+
+Benchmark/mexpr-string-cached-12       43399368        26.83 ns/op     0 B/op      0 allocs/op
+Benchmark/_expr-string-cached-12        7302940       162.0 ns/op     48 B/op      2 allocs/op
+
+Benchmark/mexpr-index-cached-12        45289230        25.67 ns/op     0 B/op      0 allocs/op
+Benchmark/_expr-index-cached-12         6057562       180.0 ns/op     48 B/op      2 allocs/op
+
+Benchmark/mexpr-complex-cached-12       4271955       278.7 ns/op     40 B/op      3 allocs/op
+Benchmark/_expr-complex-cached-12       1456266       818.7 ns/op    208 B/op      9 allocs/op
+
 ```
 
+On average mexpr is around 3-10x faster for both full parsing and cached performance.
+
 ## References
 
 These were a big help in understanding how Pratt parsers work:

diff --git a/conversions.go b/conversions.go
@@ -39,7 +39,7 @@ func toNumber(ast *Node, v interface{}) (float64, Error) {
 	case float32:
 		return float64(n), nil
 	}
-	return 0, NewError(ast.Offset, "unable to convert to number")
+	return 0, NewError(ast.Offset, ast.Length, "unable to convert to number")
 }
 
 func isString(v interface{}) bool {
@@ -110,3 +110,37 @@ func toBool(v interface{}) bool {
 	}
 	return false
 }
+
+// normalize an input for equality checks. All numbers -> float64, []byte to
+// string, etc. Since `rune` is an alias for int32, we can't differentiate it
+// for comparison with strings.
+func normalize(v interface{}) interface{} {
+	switch n := v.(type) {
+	case int:
+		return float64(n)
+	case int8:
+		return float64(n)
+	case int16:
+		return float64(n)
+	case int32:
+		return float64(n)
+	case int64:
+		return float64(n)
+	case uint:
+		return float64(n)
+	case uint8:
+		return float64(n)
+	case uint16:
+		return float64(n)
+	case uint32:
+		return float64(n)
+	case uint64:
+		return float64(n)
+	case float32:
+		return float64(n)
+	case []byte:
+		return string(n)
+	}
+
+	return v
+}
diff --git a/error.go b/error.go
@@ -7,39 +7,50 @@ type Error interface {
 	Error() string
 
 	// Offset returns the character offset of the error within the experssion.
-	Offset() int
+	Offset() uint16
+
+	// Length returns the length in bytes after the offset where the error ends.
+	Length() uint8
 
 	// Pretty prints out a message with a pointer to the source location of the
 	// error.
 	Pretty(source string) string
 }
 
 type exprErr struct {
-	offset  int
+	offset  uint16
+	length  uint8
 	message string
 }
 
 func (e *exprErr) Error() string {
 	return e.message
 }
 
-func (e *exprErr) Offset() int {
+func (e *exprErr) Offset() uint16 {
 	return e.offset
 }
 
+func (e *exprErr) Length() uint8 {
+	return e.length
+}
+
 func (e *exprErr) Pretty(source string) string {
 	msg := e.Error() + "\n" + source + "\n"
-	for i := 0; i < e.offset; i++ {
+	for i := uint16(0); i < e.offset; i++ {
 		msg += "."
 	}
-	msg += "^"
+	for i := uint8(0); i < e.length; i++ {
+		msg += "^"
+	}
 	return msg
 }
 
 // NewError creates a new error at a specific location.
-func NewError(offset int, format string, a ...interface{}) Error {
+func NewError(offset uint16, length uint8, format string, a ...interface{}) Error {
 	return &exprErr{
 		offset:  offset,
+		length:  length,
 		message: fmt.Sprintf(format, a...),
 	}
 }
diff --git a/interpreter.go b/interpreter.go
@@ -50,10 +50,10 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 		if ast.Value.(string) == "length" {
 			// Special pseudo-property to get the value's length.
 			if s, ok := value.(string); ok {
-				return float64(len(s)), nil
+				return len(s), nil
 			}
 			if a, ok := value.([]interface{}); ok {
-				return float64(len(a)), nil
+				return len(a), nil
 			}
 		}
 		if m, ok := value.(map[string]interface{}); ok {
@@ -64,7 +64,7 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 				return v, nil
 			}
 		}
-		return nil, NewError(ast.Offset, "cannot get %v from %v", ast.Value, value)
+		return nil, NewError(ast.Offset, ast.Length, "cannot get %v from %v", ast.Value, value)
 	case NodeFieldSelect:
 		leftValue, err := i.run(ast.Left, value)
 		if err != nil {
@@ -77,7 +77,7 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 			return nil, err
 		}
 		if !isSlice(resultLeft) && !isString(resultLeft) {
-			return nil, NewError(ast.Offset, "can only index strings or arrays but got %v", resultLeft)
+			return nil, NewError(ast.Offset, ast.Length, "can only index strings or arrays but got %v", resultLeft)
 		}
 		resultRight, err := i.run(ast.Right, value)
 		if err != nil {
@@ -94,19 +94,19 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 			}
 			if left, ok := resultLeft.([]interface{}); ok {
 				if start < 0 {
-					start = float64(len(left) + int(start))
+					start += float64(len(left))
 				}
 				if end < 0 {
-					end = float64(len(left) + int(end))
+					end += float64(len(left))
 				}
 				return left[int(start) : int(end)+1], nil
 			}
 			left := toString(resultLeft)
 			if start < 0 {
-				start = float64(len(left) + int(start))
+				start += float64(len(left))
 			}
 			if end < 0 {
-				end = float64(len(left) + int(end))
+				end += float64(len(left))
 			}
 			return left[int(start) : int(end)+1], nil
 		}
@@ -117,17 +117,17 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 			}
 			if left, ok := resultLeft.([]interface{}); ok {
 				if idx < 0 {
-					idx = float64(len(left) + int(idx))
+					idx += float64(len(left))
 				}
 				return left[int(idx)], nil
 			}
 			left := toString(resultLeft)
 			if idx < 0 {
-				idx = float64(len(left) + int(idx))
+				idx += float64(len(left))
 			}
 			return string(left[int(idx)]), nil
 		}
-		return nil, NewError(ast.Offset, "array index must be number or slice %v", resultRight)
+		return nil, NewError(ast.Offset, ast.Length, "array index must be number or slice %v", resultRight)
 	case NodeSlice:
 		resultLeft, err := i.run(ast.Left, value)
 		if err != nil {
@@ -137,7 +137,9 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 		if err != nil {
 			return nil, err
 		}
-		return []interface{}{resultLeft, resultRight}, nil
+		ast.Value.([]interface{})[0] = resultLeft
+		ast.Value.([]interface{})[1] = resultRight
+		return ast.Value, nil
 	case NodeLiteral:
 		return ast.Value, nil
 	case NodeSign:
@@ -153,7 +155,7 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 			right = -right
 		}
 		return right, nil
-	case NodeAdd, NodeSubtract, NodeMultiply, NodeDivide, NodePower:
+	case NodeAdd, NodeSubtract, NodeMultiply, NodeDivide, NodeModulus, NodePower:
 		resultLeft, err := i.run(ast.Left, value)
 		if err != nil {
 			return nil, err
@@ -172,11 +174,11 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 			}
 		}
 		if isNumber(resultLeft) && isNumber(resultRight) {
-			left, err := toNumber(ast, resultLeft)
+			left, err := toNumber(ast.Left, resultLeft)
 			if err != nil {
 				return nil, err
 			}
-			right, err := toNumber(ast, resultRight)
+			right, err := toNumber(ast.Right, resultRight)
 			if err != nil {
 				return nil, err
 			}
@@ -188,14 +190,17 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 			case NodeMultiply:
 				return left * right, nil
 			case NodeDivide:
+				if right == 0.0 {
+					return nil, NewError(ast.Offset, ast.Length, "cannot divide by zero")
+				}
 				return left / right, nil
 			case NodeModulus:
-				return float64(int(left) % int(right)), nil
+				return int(left) % int(right), nil
 			case NodePower:
 				return math.Pow(left, right), nil
 			}
 		}
-		return nil, NewError(ast.Offset, "cannot add incompatible types %v and %v", resultLeft, resultRight)
+		return nil, NewError(ast.Offset, ast.Length, "cannot add incompatible types %v and %v", resultLeft, resultRight)
 	case NodeEqual, NodeNotEqual, NodeLessThan, NodeLessThanEqual, NodeGreaterThan, NodeGreaterThanEqual:
 		resultLeft, err := i.run(ast.Left, value)
 		if err != nil {
@@ -206,17 +211,17 @@ func (i *interpreter) run(ast *Node, value interface{}) (interface{}, Error) {
 			return nil, err
 		}
 		if ast.Type == NodeEqual {
-			return resultLeft == resultRight, nil
+			return normalize(resultLeft) == normalize(resultRight), nil
 		}
 		if ast.Type == NodeNotEqual {
-			return resultLeft != resultRight, nil
+			return normalize(resultLeft) != normalize(resultRight), nil
 		}
 
-		left, err := toNumber(ast, resultLeft)
+		left, err := toNumber(ast.Left, resultLeft)
 		if err != nil {
 			return nil, err
 		}
-		right, err := toNumber(ast, resultRight)
+		right, err := toNumber(ast.Right, resultRight)
 		if err != nil {
 			return nil, err
 		}