Files
chaisql/internal/sql/scanner/scanner_test.go
2025-09-21 11:28:55 +05:30

322 lines
9.1 KiB
Go

package scanner
import (
"reflect"
"strings"
"testing"
)
// Ensure the scanner can scan tokens correctly.
func TestScanner_Scan(t *testing.T) {
var tests = []struct {
s string
tok Token
lit string
pos Pos
}{
// Special tokens (EOF, ILLEGAL, WS)
{s: ``, tok: EOF},
{s: `#`, tok: ILLEGAL, lit: `#`},
{s: ` `, tok: WS, lit: " "},
{s: "\t", tok: WS, lit: "\t"},
{s: "\n", tok: WS, lit: "\n"},
{s: "\r", tok: WS, lit: "\n"},
{s: "\r\n", tok: WS, lit: "\n"},
{s: "\rX", tok: WS, lit: "\n"},
{s: "\n\r", tok: WS, lit: "\n\n"},
{s: " \n\t \r\n\t", tok: WS, lit: " \n\t \n\t"},
{s: " foo", tok: WS, lit: " "},
// Numeric operators
{s: `+`, tok: ADD},
{s: `-`, tok: SUB},
{s: `*`, tok: MUL},
{s: `/`, tok: DIV},
{s: `%`, tok: MOD},
// Logical operators
{s: `AND`, tok: AND},
{s: `and`, tok: AND},
{s: `OR`, tok: OR},
{s: `or`, tok: OR},
// Comparison operators
{s: `=`, tok: EQ},
{s: `==`, tok: EQ},
{s: `<>`, tok: NEQ},
{s: `! `, tok: ILLEGAL, lit: "!"},
{s: `<`, tok: LT},
{s: `<=`, tok: LTE},
{s: `>`, tok: GT},
{s: `>=`, tok: GTE},
{s: `IN`, tok: IN},
{s: `IS`, tok: IS},
{s: `LIKE`, tok: LIKE},
{s: `||`, tok: CONCAT},
// Misc tokens
{s: `(`, tok: LPAREN},
{s: `)`, tok: RPAREN},
{s: `{`, tok: LBRACKET},
{s: `}`, tok: RBRACKET},
{s: `[`, tok: LSBRACKET},
{s: `]`, tok: RSBRACKET},
{s: `,`, tok: COMMA},
{s: `;`, tok: SEMICOLON},
{s: `.`, tok: DOT},
{s: `=~`, tok: EQREGEX},
{s: `!~`, tok: NEQREGEX},
{s: `:`, tok: COLON},
{s: `::`, tok: DOUBLECOLON},
{s: `--`, tok: COMMENT},
{s: `--10.3`, tok: COMMENT, lit: ``},
// Identifiers
{s: `foo`, tok: IDENT, lit: `foo`},
{s: `_foo`, tok: IDENT, lit: `_foo`},
{s: `Zx12_3U_-`, tok: IDENT, lit: `Zx12_3U_`},
{s: `"foo"`, tok: IDENT, lit: "foo"},
{s: `"foo\\bar"`, tok: IDENT, lit: "foo\\bar"},
{s: `"foo\bar"`, tok: BADESCAPE, lit: `\b`, pos: Pos{Line: 0, Char: 5}},
{s: `"foo\"bar\""`, tok: IDENT, lit: `foo"bar"`},
{s: `test"`, tok: BADSTRING, lit: "", pos: Pos{Line: 0, Char: 3}},
{s: `"test`, tok: BADSTRING, lit: "test"},
{s: "?", tok: ILLEGAL, lit: "?"},
{s: "$10", tok: POSITIONALPARAM, lit: "$10"},
{s: `"testing 123!"`, tok: IDENT, lit: `testing 123!`},
// Booleans
{s: `true`, tok: TRUE},
{s: `false`, tok: FALSE},
// Null
{s: `null`, tok: NULL},
{s: `NULL`, tok: NULL},
// Strings
{s: `'testing 123!'`, tok: STRING, lit: `testing 123!`},
{s: `'foo\nbar'`, tok: STRING, lit: "foo\nbar"},
{s: `'foo\\bar'`, tok: STRING, lit: "foo\\bar"},
{s: `'test`, tok: BADSTRING, lit: `test`},
{s: `'test\g'`, tok: BADESCAPE, lit: `\g`, pos: Pos{Line: 0, Char: 6}},
{s: `'test`, tok: BADSTRING, lit: `test`},
{s: `'test\nfoo`, tok: BADSTRING, lit: "test\nfoo"},
{s: `'test\g'`, tok: BADESCAPE, lit: `\g`, pos: Pos{Line: 0, Char: 6}},
// Numbers
{s: `100`, tok: INTEGER, lit: `100`},
{s: `100.23`, tok: NUMBER, lit: `100.23`},
{s: `.23`, tok: NUMBER, lit: `.23`},
{s: `10.3s`, tok: NUMBER, lit: `10.3`},
{s: `1.2e10`, tok: NUMBER, lit: `1.2e10`},
{s: `1.2E10`, tok: NUMBER, lit: `1.2E10`},
{s: `1.2e+10`, tok: NUMBER, lit: `1.2e+10`},
{s: `1.2e-10`, tok: NUMBER, lit: `1.2e-10`},
// Keywords
{s: `ADD`, tok: ADD_KEYWORD},
{s: `ALTER`, tok: ALTER},
{s: `AS`, tok: AS},
{s: `ASC`, tok: ASC},
{s: `ALL`, tok: ALL},
{s: `BY`, tok: BY},
{s: `BEGIN`, tok: BEGIN},
{s: `BETWEEN`, tok: BETWEEN},
{s: `CACHE`, tok: CACHE},
{s: `CAST`, tok: CAST},
{s: `CHECK`, tok: CHECK},
{s: `COMMIT`, tok: COMMIT},
{s: `CONFLICT`, tok: CONFLICT},
{s: `CONSTRAINT`, tok: CONSTRAINT},
{s: `CREATE`, tok: CREATE},
{s: `CYCLE`, tok: CYCLE},
{s: `DEFAULT`, tok: DEFAULT},
{s: `DELETE`, tok: DELETE},
{s: `DESC`, tok: DESC},
{s: `DO`, tok: DO},
{s: `DISTINCT`, tok: DISTINCT},
{s: `DROP`, tok: DROP},
{s: `EXPLAIN`, tok: EXPLAIN},
{s: `GROUP`, tok: GROUP},
{s: `COLUMN`, tok: COLUMN},
{s: `FOR`, tok: FOR},
{s: `FROM`, tok: FROM},
{s: `IGNORE`, tok: IGNORE},
{s: `INCREMENT`, tok: INCREMENT},
{s: `INDEX`, tok: INDEX},
{s: `INSERT`, tok: INSERT},
{s: `INTO`, tok: INTO},
{s: `LIMIT`, tok: LIMIT},
{s: `MAXVALUE`, tok: MAXVALUE},
{s: `MINVALUE`, tok: MINVALUE},
{s: `NO`, tok: NO},
{s: `NOT`, tok: NOT},
{s: `NOTHING`, tok: NOTHING},
{s: `ONLY`, tok: ONLY},
{s: `OFFSET`, tok: OFFSET},
{s: `ORDER`, tok: ORDER},
{s: `PRIMARY`, tok: PRIMARY},
{s: `READ`, tok: READ},
{s: `REINDEX`, tok: REINDEX},
{s: `RENAME`, tok: RENAME},
{s: `REPLACE`, tok: REPLACE},
{s: `RETURNING`, tok: RETURNING},
{s: `ROLLBACK`, tok: ROLLBACK},
{s: `SELECT`, tok: SELECT},
{s: `SEQUENCE`, tok: SEQUENCE},
{s: `SET`, tok: SET},
{s: `START`, tok: START},
{s: `TABLE`, tok: TABLE},
{s: `TO`, tok: TO},
{s: `TRANSACTION`, tok: TRANSACTION},
{s: `UPDATE`, tok: UPDATE},
{s: `UNION`, tok: UNION},
{s: `VALUES`, tok: VALUES},
{s: `WITH`, tok: WITH},
{s: `WHERE`, tok: WHERE},
{s: `WRITE`, tok: WRITE},
{s: `seLECT`, tok: SELECT}, // case insensitive
// types
{s: "BYTES", tok: TYPEBYTES},
{s: "BOOL", tok: TYPEBOOL},
{s: "BOOLEAN", tok: TYPEBOOLEAN},
{s: "DOUBLE", tok: TYPEDOUBLE},
{s: "INTEGER", tok: TYPEINTEGER},
{s: "TEXT", tok: TYPETEXT},
{s: "TIMESTAMP", tok: TYPETIMESTAMP},
}
for i, tt := range tests {
s := NewScanner(strings.NewReader(tt.s))
tok, pos, lit := s.Scan()
if tt.tok != tok {
t.Errorf("%d. %q token mismatch: exp=%q got=%q <%q>", i, tt.s, tt.tok, tok, lit)
} else if tt.pos.Line != pos.Line || tt.pos.Char != pos.Char {
t.Errorf("%d. %q pos mismatch: exp=%#v got=%#v", i, tt.s, tt.pos, pos)
} else if tt.lit != lit {
t.Errorf("%d. %q literal mismatch: exp=%q got=%q", i, tt.s, tt.lit, lit)
}
}
}
// Ensure the scanner can scan a series of tokens correctly.
func TestScanner_Scan_Multi(t *testing.T) {
type result struct {
tok Token
pos Pos
lit string
}
exp := []result{
{tok: SELECT, pos: Pos{Line: 0, Char: 0}, lit: ""},
{tok: WS, pos: Pos{Line: 0, Char: 6}, lit: " "},
{tok: IDENT, pos: Pos{Line: 0, Char: 7}, lit: "val"},
{tok: WS, pos: Pos{Line: 0, Char: 10}, lit: " "},
{tok: FROM, pos: Pos{Line: 0, Char: 11}, lit: ""},
{tok: WS, pos: Pos{Line: 0, Char: 15}, lit: " "},
{tok: IDENT, pos: Pos{Line: 0, Char: 16}, lit: "my_table"},
{tok: WS, pos: Pos{Line: 0, Char: 24}, lit: " "},
{tok: WHERE, pos: Pos{Line: 0, Char: 25}, lit: ""},
{tok: WS, pos: Pos{Line: 0, Char: 30}, lit: " "},
{tok: IDENT, pos: Pos{Line: 0, Char: 31}, lit: "a"},
{tok: WS, pos: Pos{Line: 0, Char: 32}, lit: " "},
{tok: EQ, pos: Pos{Line: 0, Char: 33}, lit: ""},
{tok: WS, pos: Pos{Line: 0, Char: 34}, lit: " "},
{tok: STRING, pos: Pos{Line: 0, Char: 34}, lit: "b"},
{tok: EOF, pos: Pos{Line: 0, Char: 38}, lit: ""},
}
v := `SELECT val from my_table WHERE a = 'b'`
s := newScanner(strings.NewReader(v))
// Continually scan until we reach the end.
var act []result
for {
tok, pos, lit := s.Scan()
act = append(act, result{tok, pos, lit})
if tok == EOF {
break
}
}
// Verify the token counts match.
if len(exp) != len(act) {
t.Fatalf("token count mismatch: exp=%d, got=%d", len(exp), len(act))
}
// Verify each token matches.
for i := range exp {
if !reflect.DeepEqual(exp[i], act[i]) {
t.Fatalf("%d. token mismatch:\n\nexp=%#v\n\ngot=%#v", i, exp[i], act[i])
}
}
}
// Ensure the library can correctly scan strings.
func TestScanString(t *testing.T) {
var tests = []struct {
in string
out string
err string
}{
{in: `''`, out: ``},
{in: `'foo bar'`, out: `foo bar`},
{in: `'foo\nbar'`, out: "foo\nbar"},
{in: `'foo\rbar'`, out: "foo\rbar"},
{in: `'foo\tbar'`, out: "foo\tbar"},
{in: `'foo\r\nbar'`, out: "foo\r\nbar"},
{in: `'foo\r\nbar\r\n\trm'`, out: "foo\r\nbar\r\n\trm"},
{in: `'foo\\bar'`, out: `foo\bar`},
{in: `'foo\"bar'`, out: `foo"bar`},
{in: `'\xAF'`, out: `\xAF`},
{in: `"foo` + "\n", out: `foo`, err: "bad string"}, // newline in string
{in: `"foo`, out: `foo`, err: "bad string"}, // unclosed quotes
{in: `"foo\xbar"`, out: `foo\xbar`, err: ""}, // preserved hex escape sequence
}
for i, tt := range tests {
out, err := scanString(strings.NewReader(tt.in))
if tt.err != errstring(err) {
t.Errorf("%d. %s: error: exp=%s, got=%s", i, tt.in, tt.err, err)
} else if tt.out != out {
t.Errorf("%d. %s: out: exp=%s, got=%s", i, tt.in, tt.out, out)
}
}
}
// errstring converts an error to its string representation.
func errstring(err error) string {
if err != nil {
return err.Error()
}
return ""
}
// Test scanning regex
func TestScanRegex(t *testing.T) {
var tests = []struct {
in string
tok Token
lit string
}{
{in: `/^payments\./`, tok: REGEX, lit: `^payments\.`},
{in: `/foo\/bar/`, tok: REGEX, lit: `foo/bar`},
{in: `/foo\\/bar/`, tok: REGEX, lit: `foo\/bar`},
{in: `/foo\\bar/`, tok: REGEX, lit: `foo\\bar`},
{in: `/http\:\/\/www\.example\.com/`, tok: REGEX, lit: `http\://www\.example\.com`},
}
for i, tt := range tests {
s := newScanner(strings.NewReader(tt.in))
tok, _, lit := s.ScanRegex()
if tok != tt.tok {
t.Errorf("%d. %s: error:\n\texp=%s\n\tgot=%s\n", i, tt.in, tt.tok.String(), tok.String())
}
if lit != tt.lit {
t.Errorf("%d. %s: error:\n\texp=%s\n\tgot=%s\n", i, tt.in, tt.lit, lit)
}
}
}