Files
chaisql/internal/sql/parser/expr.go
2021-07-02 18:41:33 +04:00

675 lines
17 KiB
Go

package parser
import (
"strconv"
"github.com/genjidb/genji/document"
"github.com/genjidb/genji/internal/environment"
"github.com/genjidb/genji/internal/expr"
"github.com/genjidb/genji/internal/sql/scanner"
"github.com/genjidb/genji/internal/stringutil"
)
type dummyOperator struct {
rightHand expr.Expr
}
func (d *dummyOperator) Token() scanner.Token { panic("not implemented") }
func (d *dummyOperator) Equal(expr.Expr) bool { panic("not implemented") }
func (d *dummyOperator) Eval(*environment.Environment) (document.Value, error) {
panic("not implemented")
}
func (d *dummyOperator) String() string { panic("not implemented") }
func (d *dummyOperator) Precedence() int { panic("not implemented") }
func (d *dummyOperator) LeftHand() expr.Expr { panic("not implemented") }
func (d *dummyOperator) RightHand() expr.Expr { return d.rightHand }
func (d *dummyOperator) SetLeftHandExpr(e expr.Expr) { panic("not implemented") }
func (d *dummyOperator) SetRightHandExpr(e expr.Expr) { d.rightHand = e }
// ParseExpr parses an expression.
func (p *Parser) ParseExpr() (e expr.Expr, err error) {
return p.parseExprWithMinPrecedence(0)
}
func (p *Parser) parseExprWithMinPrecedence(precedence int, allowed ...scanner.Token) (e expr.Expr, err error) {
// Dummy root node.
var root expr.Operator = new(dummyOperator)
// Parse a non-binary expression type to start.
// This variable will always be the root of the expression tree.
e, err = p.parseUnaryExpr(allowed...)
if err != nil {
return nil, err
}
root.SetRightHandExpr(e)
// Loop over operations and unary exprs and build a tree based on precedence.
for {
// If the next token is NOT an operator then return the expression.
op, tok, err := p.parseOperator(precedence, allowed...)
if err != nil {
return nil, err
}
if tok == 0 {
return root.RightHand(), nil
}
var rhs expr.Expr
if rhs, err = p.parseUnaryExpr(allowed...); err != nil {
return nil, err
}
// Find the right spot in the tree to add the new expression by
// descending the RHS of the expression tree until we reach the last
// BinaryExpr or a BinaryExpr whose RHS has an operator with
// precedence >= the operator being added.
for node := root; ; {
p, ok := node.RightHand().(expr.Operator)
if !ok || p.Precedence() >= tok.Precedence() {
// Add the new expression here and break.
node.SetRightHandExpr(op(node.RightHand(), rhs))
break
}
node = p
}
}
}
func (p *Parser) parseOperator(minPrecedence int, allowed ...scanner.Token) (func(lhs, rhs expr.Expr) expr.Expr, scanner.Token, error) {
op, _, _ := p.ScanIgnoreWhitespace()
if !op.IsOperator() && op != scanner.NOT {
p.Unscan()
return nil, 0, nil
}
if !tokenIsAllowed(op, allowed...) {
p.Unscan()
return nil, 0, nil
}
// Ignore currently unused operators.
if op == scanner.EQREGEX || op == scanner.NEQREGEX {
p.Unscan()
return nil, 0, nil
}
if op == scanner.NOT {
tok, pos, lit := p.ScanIgnoreWhitespace()
if tok.Precedence() >= minPrecedence {
switch {
case tok == scanner.IN && tok.Precedence() >= minPrecedence:
return expr.NotIn, op, nil
case tok == scanner.LIKE && tok.Precedence() >= minPrecedence:
return expr.NotLike, op, nil
}
}
return nil, 0, newParseError(scanner.Tokstr(tok, lit), []string{"IN, LIKE"}, pos)
}
if op.Precedence() < minPrecedence {
p.Unscan()
return nil, 0, nil
}
switch op {
case scanner.EQ:
return expr.Eq, op, nil
case scanner.NEQ:
return expr.Neq, op, nil
case scanner.GT:
return expr.Gt, op, nil
case scanner.GTE:
return expr.Gte, op, nil
case scanner.LT:
return expr.Lt, op, nil
case scanner.LTE:
return expr.Lte, op, nil
case scanner.AND:
return expr.And, op, nil
case scanner.OR:
return expr.Or, op, nil
case scanner.ADD:
return expr.Add, op, nil
case scanner.SUB:
return expr.Sub, op, nil
case scanner.MUL:
return expr.Mul, op, nil
case scanner.DIV:
return expr.Div, op, nil
case scanner.MOD:
return expr.Mod, op, nil
case scanner.BITWISEAND:
return expr.BitwiseAnd, op, nil
case scanner.BITWISEOR:
return expr.BitwiseOr, op, nil
case scanner.BITWISEXOR:
return expr.BitwiseXor, op, nil
case scanner.IN:
return expr.In, op, nil
case scanner.IS:
if tok, _, _ := p.ScanIgnoreWhitespace(); tok == scanner.NOT {
return expr.IsNot, op, nil
}
p.Unscan()
return expr.Is, op, nil
case scanner.LIKE:
return expr.Like, op, nil
case scanner.CONCAT:
return expr.Concat, op, nil
case scanner.BETWEEN:
a, err := p.parseExprWithMinPrecedence(op.Precedence())
if err != nil {
return nil, op, err
}
err = p.parseTokens(scanner.AND)
if err != nil {
return nil, op, err
}
return expr.Between(a), op, nil
}
p.Unscan()
return nil, 0, nil
}
// parseUnaryExpr parses an non-binary expression.
func (p *Parser) parseUnaryExpr(allowed ...scanner.Token) (expr.Expr, error) {
tok, pos, lit := p.ScanIgnoreWhitespace()
if !tokenIsAllowed(tok, allowed...) {
p.Unscan()
return nil, nil
}
switch tok {
case scanner.CAST:
p.Unscan()
return p.parseCastExpression()
case scanner.IDENT:
// if the next token is a left parenthesis, this is a function
if tok1, _, _ := p.Scan(); tok1 == scanner.LPAREN {
p.Unscan()
p.Unscan()
return p.parseFunction()
}
p.Unscan()
p.Unscan()
field, err := p.parsePath()
if err != nil {
return nil, err
}
fs := expr.Path(field)
return fs, nil
case scanner.NAMEDPARAM:
if len(lit) == 1 {
return nil, &ParseError{Message: "missing param name"}
}
if p.orderedParams > 0 {
return nil, &ParseError{Message: "cannot mix positional arguments with named arguments"}
}
p.namedParams++
return expr.NamedParam(lit[1:]), nil
case scanner.POSITIONALPARAM:
if p.namedParams > 0 {
return nil, &ParseError{Message: "cannot mix positional arguments with named arguments"}
}
p.orderedParams++
return expr.PositionalParam(p.orderedParams), nil
case scanner.STRING:
return expr.LiteralValue(document.NewTextValue(lit)), nil
case scanner.NUMBER:
v, err := strconv.ParseFloat(lit, 64)
if err != nil {
return nil, &ParseError{Message: "unable to parse number", Pos: pos}
}
return expr.LiteralValue(document.NewDoubleValue(v)), nil
case scanner.INTEGER:
v, err := strconv.ParseInt(lit, 10, 64)
if err != nil {
// The literal may be too large to fit into an int64, parse as Float64
if v, err := strconv.ParseFloat(lit, 64); err == nil {
return expr.LiteralValue(document.NewDoubleValue(v)), nil
}
return nil, &ParseError{Message: "unable to parse integer", Pos: pos}
}
return expr.LiteralValue(document.NewIntegerValue(v)), nil
case scanner.TRUE, scanner.FALSE:
return expr.LiteralValue(document.NewBoolValue(tok == scanner.TRUE)), nil
case scanner.NULL:
return expr.LiteralValue(document.NewNullValue()), nil
case scanner.LBRACKET:
p.Unscan()
e, err := p.ParseDocument()
return e, err
case scanner.LSBRACKET:
p.Unscan()
return p.parseExprList(scanner.LSBRACKET, scanner.RSBRACKET)
case scanner.LPAREN:
e, err := p.ParseExpr()
if err != nil {
return nil, err
}
tok, pos, lit := p.ScanIgnoreWhitespace()
switch tok {
case scanner.RPAREN:
return expr.Parentheses{E: e}, nil
case scanner.COMMA:
exprList, err := p.parseExprListUntil(scanner.RPAREN)
if err != nil {
return nil, err
}
// prepend first parsed expression
exprList = append([]expr.Expr{e}, exprList...)
return exprList, nil
}
return nil, newParseError(scanner.Tokstr(tok, lit), []string{")", ","}, pos)
case scanner.NOT:
e, err := p.ParseExpr()
if err != nil {
return nil, err
}
return expr.Not(e), nil
case scanner.NEXT:
err := p.parseTokens(scanner.VALUE, scanner.FOR)
if err != nil {
return nil, err
}
seqName, err := p.parseIdent()
if err != nil {
return nil, err
}
return expr.NextValueFor{SeqName: seqName}, nil
default:
return nil, newParseError(scanner.Tokstr(tok, lit), nil, pos)
}
}
// parseInteger parses an integer.
func (p *Parser) parseInteger() (int64, error) {
tok, pos, lit := p.ScanIgnoreWhitespace()
if tok != scanner.INTEGER {
return 0, newParseError(scanner.Tokstr(tok, lit), []string{"integer"}, pos)
}
v, err := strconv.ParseInt(lit, 10, 64)
if err != nil {
return 0, newParseError(scanner.Tokstr(tok, lit), []string{"INT"}, pos)
}
return v, nil
}
// parseIdent parses an identifier.
func (p *Parser) parseIdent() (string, error) {
tok, pos, lit := p.ScanIgnoreWhitespace()
if tok != scanner.IDENT {
return "", newParseError(scanner.Tokstr(tok, lit), []string{"identifier"}, pos)
}
return lit, nil
}
// parseIdentList parses a comma delimited list of identifiers.
func (p *Parser) parseIdentList() ([]string, error) {
// Parse first (required) identifier.
ident, err := p.parseIdent()
if err != nil {
return nil, err
}
idents := []string{ident}
// Parse remaining (optional) identifiers.
for {
if tok, _, _ := p.ScanIgnoreWhitespace(); tok != scanner.COMMA {
p.Unscan()
return idents, nil
}
if ident, err = p.parseIdent(); err != nil {
return nil, err
}
idents = append(idents, ident)
}
}
// parseParam parses a positional or named param.
func (p *Parser) parseParam() (expr.Expr, error) {
tok, _, lit := p.ScanIgnoreWhitespace()
switch tok {
case scanner.NAMEDPARAM:
if len(lit) == 1 {
return nil, &ParseError{Message: "missing param name"}
}
if p.orderedParams > 0 {
return nil, &ParseError{Message: "cannot mix positional arguments with named arguments"}
}
p.namedParams++
return expr.NamedParam(lit[1:]), nil
case scanner.POSITIONALPARAM:
if p.namedParams > 0 {
return nil, &ParseError{Message: "cannot mix positional arguments with named arguments"}
}
p.orderedParams++
return expr.PositionalParam(p.orderedParams), nil
default:
return nil, nil
}
}
func (p *Parser) parseType() (document.ValueType, error) {
tok, pos, lit := p.ScanIgnoreWhitespace()
switch tok {
case scanner.TYPEARRAY:
return document.ArrayValue, nil
case scanner.TYPEBLOB:
return document.BlobValue, nil
case scanner.TYPEBOOL:
return document.BoolValue, nil
case scanner.TYPEBYTES:
return document.BlobValue, nil
case scanner.TYPEDOCUMENT:
return document.DocumentValue, nil
case scanner.TYPEREAL:
return document.DoubleValue, nil
case scanner.TYPEDOUBLE:
tok, _, _ := p.ScanIgnoreWhitespace()
if tok == scanner.PRECISION {
return document.DoubleValue, nil
}
p.Unscan()
return document.DoubleValue, nil
case scanner.TYPEINTEGER, scanner.TYPEINT, scanner.TYPEINT2, scanner.TYPEINT8, scanner.TYPETINYINT,
scanner.TYPEBIGINT, scanner.TYPEMEDIUMINT, scanner.TYPESMALLINT:
return document.IntegerValue, nil
case scanner.TYPETEXT:
return document.TextValue, nil
case scanner.TYPEVARCHAR, scanner.TYPECHARACTER:
if tok, pos, lit := p.ScanIgnoreWhitespace(); tok != scanner.LPAREN {
return 0, newParseError(scanner.Tokstr(tok, lit), []string{"("}, pos)
}
// The value between parentheses is not used.
if tok, pos, lit := p.ScanIgnoreWhitespace(); tok != scanner.INTEGER {
return 0, newParseError(scanner.Tokstr(tok, lit), []string{"integer"}, pos)
}
if tok, pos, lit := p.ScanIgnoreWhitespace(); tok != scanner.RPAREN {
return 0, newParseError(scanner.Tokstr(tok, lit), []string{")"}, pos)
}
return document.TextValue, nil
}
return 0, newParseError(scanner.Tokstr(tok, lit), []string{"type"}, pos)
}
// ParseDocument parses a document
func (p *Parser) ParseDocument() (*expr.KVPairs, error) {
// Parse { token.
if err := p.parseTokens(scanner.LBRACKET); err != nil {
return nil, err
}
var pairs expr.KVPairs
pairs.SelfReferenced = true
var pair expr.KVPair
var err error
fields := make(map[string]struct{})
// Parse kv pairs.
for {
if pair, err = p.parseKV(); err != nil {
p.Unscan()
break
}
if _, ok := fields[pair.K]; ok {
return nil, stringutil.Errorf("duplicate field %q", pair.K)
}
fields[pair.K] = struct{}{}
pairs.Pairs = append(pairs.Pairs, pair)
if tok, _, _ := p.ScanIgnoreWhitespace(); tok != scanner.COMMA {
p.Unscan()
break
}
}
// Parse required } token.
if err := p.parseTokens(scanner.RBRACKET); err != nil {
return nil, err
}
return &pairs, nil
}
// parseKV parses a key-value pair in the form IDENT : Expr.
func (p *Parser) parseKV() (expr.KVPair, error) {
var k string
tok, pos, lit := p.ScanIgnoreWhitespace()
if tok == scanner.IDENT || tok == scanner.STRING {
k = lit
} else {
return expr.KVPair{}, newParseError(scanner.Tokstr(tok, lit), []string{"ident", "string"}, pos)
}
if err := p.parseTokens(scanner.COLON); err != nil {
p.Unscan()
return expr.KVPair{}, err
}
e, err := p.ParseExpr()
if err != nil {
return expr.KVPair{}, err
}
return expr.KVPair{
K: k,
V: e,
}, nil
}
// parsePath parses a path to a specific value.
func (p *Parser) parsePath() (document.Path, error) {
var path document.Path
// parse first mandatory ident
chunk, err := p.parseIdent()
if err != nil {
return nil, err
}
path = append(path, document.PathFragment{
FieldName: chunk,
})
LOOP:
for {
// scan the very next token.
// if can be either a '.' or a '['
// Otherwise, unscan and return the path
tok, _, _ := p.Scan()
switch tok {
case scanner.DOT:
// scan the next token for an ident
tok, pos, lit := p.Scan()
if tok != scanner.IDENT {
return nil, newParseError(lit, []string{"identifier"}, pos)
}
path = append(path, document.PathFragment{
FieldName: lit,
})
case scanner.LSBRACKET:
// scan the next token for an integer
tok, pos, lit := p.Scan()
if tok != scanner.INTEGER || lit[0] == '-' {
return nil, newParseError(lit, []string{"array index"}, pos)
}
idx, err := strconv.Atoi(lit)
if err != nil {
return nil, newParseError(lit, []string{"integer"}, pos)
}
path = append(path, document.PathFragment{
ArrayIndex: idx,
})
// scan the next token for a closing left bracket
if err := p.parseTokens(scanner.RSBRACKET); err != nil {
return nil, err
}
default:
p.Unscan()
break LOOP
}
}
return path, nil
}
func (p *Parser) parseExprListUntil(rightToken scanner.Token) (expr.LiteralExprList, error) {
var exprList expr.LiteralExprList
var expr expr.Expr
var err error
// Parse expressions.
for {
if expr, err = p.ParseExpr(); err != nil {
p.Unscan()
break
}
exprList = append(exprList, expr)
if tok, _, _ := p.ScanIgnoreWhitespace(); tok != scanner.COMMA {
p.Unscan()
break
}
}
// Parse required ) or ] token.
if err := p.parseTokens(rightToken); err != nil {
return nil, err
}
return exprList, nil
}
func (p *Parser) parseExprList(leftToken, rightToken scanner.Token) (expr.LiteralExprList, error) {
// Parse ( or [ token.
if err := p.parseTokens(leftToken); err != nil {
return nil, err
}
return p.parseExprListUntil(rightToken)
}
// parseFunction parses a function call.
// a function is an identifier followed by a parenthesis,
// an optional coma-separated list of expressions and a closing parenthesis.
func (p *Parser) parseFunction() (expr.Expr, error) {
// Parse function name.
fname, err := p.parseIdent()
if err != nil {
return nil, err
}
// Parse required ( token.
if err := p.parseTokens(scanner.LPAREN); err != nil {
return nil, err
}
// Special case: If the function is COUNT, support the special case COUNT(*)
if tok, pos, lit := p.ScanIgnoreWhitespace(); tok == scanner.MUL {
if tok, _, _ := p.ScanIgnoreWhitespace(); tok != scanner.RPAREN {
return nil, newParseError(scanner.Tokstr(tok, lit), []string{")"}, pos)
}
return &expr.CountFunc{Wildcard: true}, nil
}
p.Unscan()
// Check if the function is called without arguments.
if tok, _, _ := p.ScanIgnoreWhitespace(); tok == scanner.RPAREN {
return p.functions.GetFunc(fname)
}
p.Unscan()
var exprs []expr.Expr
// Parse expressions.
for {
e, err := p.ParseExpr()
if err != nil {
return nil, err
}
exprs = append(exprs, e)
if tok, _, _ := p.ScanIgnoreWhitespace(); tok != scanner.COMMA {
p.Unscan()
break
}
}
// Parse required ) token.
if err := p.parseTokens(scanner.RPAREN); err != nil {
return nil, err
}
return p.functions.GetFunc(fname, exprs...)
}
// parseCastExpression parses a string of the form CAST(expr AS type).
func (p *Parser) parseCastExpression() (expr.Expr, error) {
// Parse required CAST and ( tokens.
if err := p.parseTokens(scanner.CAST, scanner.LPAREN); err != nil {
return nil, err
}
// parse required expression.
e, err := p.ParseExpr()
if err != nil {
return nil, err
}
// Parse required AS token.
if err := p.parseTokens(scanner.AS); err != nil {
return nil, err
}
// Parse required typename.
tp, err := p.parseType()
if err != nil {
return nil, err
}
// Parse required ) token.
if err := p.parseTokens(scanner.RPAREN); err != nil {
return nil, err
}
return expr.CastFunc{Expr: e, CastAs: tp}, nil
}
// tokenIsAllowed is a helper function that determines if a token is allowed.
func tokenIsAllowed(tok scanner.Token, allowed ...scanner.Token) bool {
if allowed == nil {
return true
}
for _, a := range allowed {
if tok == a {
return true
}
}
return false
}