feat: upgrade client-go version to v0.29.0 (#109)

* feat: upgrade client-go version to v0.29.0

* feat: upgrade coredns version

* chore: update README.md
This commit is contained in:
naison
2024-01-01 16:45:54 +08:00
committed by GitHub
parent c3c6864b47
commit a66fbb1637
3855 changed files with 366863 additions and 114884 deletions

21
vendor/github.com/DataDog/go-sqllexer/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,21 @@
# If you prefer the allow list template instead of the deny list, see community template:
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
#
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, built with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Dependency directories (remove the comment below to include it)
# vendor/
# Go workspace file
go.work

21
vendor/github.com/DataDog/go-sqllexer/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Datadog, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

90
vendor/github.com/DataDog/go-sqllexer/README.md generated vendored Normal file
View File

@@ -0,0 +1,90 @@
# go-sqllexer
This repository contains a hand written SQL Lexer that tokenizes SQL queries with a focus on obfuscating and normalization. The lexer is written in Go with no external dependencies.
**Note** This is NOT a SQL parser, it only tokenizes SQL queries.
## Features
- :rocket: Fast and lightweight tokenization (not regex based)
- :lock: Obfuscates sensitive data (e.g. numbers, strings, specific literals like dollar quoted strings in Postgres, etc.)
- :book: Even works with truncated queries
- :globe_with_meridians: UTF-8 support
- :wrench: Normalizes obfuscated queries
## Installation
```bash
go get github.com/DataDog/go-sqllexer
```
## Usage
### Tokenize
```go
import "github.com/DataDog/go-sqllexer"
func main() {
query := "SELECT * FROM users WHERE id = 1"
lexer := sqllexer.New(query)
tokens := lexer.ScanAll()
for _, token := range tokens {
fmt.Println(token)
}
}
```
### Obfuscate
```go
import (
"fmt"
"github.com/DataDog/go-sqllexer"
)
func main() {
query := "SELECT * FROM users WHERE id = 1"
obfuscator := sqllexer.NewObfuscator()
obfuscated := obfuscator.Obfuscate(query)
// "SELECT * FROM users WHERE id = ?"
fmt.Println(obfuscated)
}
```
### Normalize
```go
import (
"fmt"
"github.com/DataDog/go-sqllexer"
)
func main() {
query := "SELECT * FROM users WHERE id in (?, ?)"
normalizer := sqllexer.NewNormalizer(
WithCollectComments(true),
WithCollectCommands(true),
WithCollectTables(true),
WithKeepSQLAlias(false),
)
normalized, statementMetadata, err := normalizer.Normalize(query)
// "SELECT * FROM users WHERE id in (?)"
fmt.Println(normalized)
}
```
## Testing
```bash
go test -v ./...
```
## Benchmarks
```bash
go test -bench=. -benchmem ./...
```
## License
[MIT License](LICENSE)

343
vendor/github.com/DataDog/go-sqllexer/normalizer.go generated vendored Normal file
View File

@@ -0,0 +1,343 @@
package sqllexer
import (
"strings"
)
type normalizerConfig struct {
// CollectTables specifies whether the normalizer should also extract the table names that a query addresses
CollectTables bool `json:"collect_tables"`
// CollectCommands specifies whether the normalizer should extract and return commands as SQL metadata
CollectCommands bool `json:"collect_commands"`
// CollectComments specifies whether the normalizer should extract and return comments as SQL metadata
CollectComments bool `json:"collect_comments"`
// CollectProcedure specifies whether the normalizer should extract and return procedure name as SQL metadata
CollectProcedure bool `json:"collect_procedure"`
// KeepSQLAlias specifies whether SQL aliases ("AS") should be truncated.
KeepSQLAlias bool `json:"keep_sql_alias"`
// UppercaseKeywords specifies whether SQL keywords should be uppercased.
UppercaseKeywords bool `json:"uppercase_keywords"`
// RemoveSpaceBetweenParentheses specifies whether spaces should be kept between parentheses.
// Spaces are inserted between parentheses by default. but this can be disabled by setting this to true.
RemoveSpaceBetweenParentheses bool `json:"remove_space_between_parentheses"`
// KeepTrailingSemicolon specifies whether the normalizer should keep the trailing semicolon.
// The trailing semicolon is removed by default, but this can be disabled by setting this to true.
// PL/SQL requires a trailing semicolon, so this should be set to true when normalizing PL/SQL.
KeepTrailingSemicolon bool `json:"keep_trailing_semicolon"`
// KeepIdentifierQuotation specifies whether the normalizer should keep the quotation of identifiers.
KeepIdentifierQuotation bool `json:"keep_identifier_quotation"`
}
type normalizerOption func(*normalizerConfig)
func WithCollectTables(collectTables bool) normalizerOption {
return func(c *normalizerConfig) {
c.CollectTables = collectTables
}
}
func WithCollectCommands(collectCommands bool) normalizerOption {
return func(c *normalizerConfig) {
c.CollectCommands = collectCommands
}
}
func WithCollectComments(collectComments bool) normalizerOption {
return func(c *normalizerConfig) {
c.CollectComments = collectComments
}
}
func WithKeepSQLAlias(keepSQLAlias bool) normalizerOption {
return func(c *normalizerConfig) {
c.KeepSQLAlias = keepSQLAlias
}
}
func WithUppercaseKeywords(uppercaseKeywords bool) normalizerOption {
return func(c *normalizerConfig) {
c.UppercaseKeywords = uppercaseKeywords
}
}
func WithCollectProcedures(collectProcedure bool) normalizerOption {
return func(c *normalizerConfig) {
c.CollectProcedure = collectProcedure
}
}
func WithRemoveSpaceBetweenParentheses(removeSpaceBetweenParentheses bool) normalizerOption {
return func(c *normalizerConfig) {
c.RemoveSpaceBetweenParentheses = removeSpaceBetweenParentheses
}
}
func WithKeepTrailingSemicolon(keepTrailingSemicolon bool) normalizerOption {
return func(c *normalizerConfig) {
c.KeepTrailingSemicolon = keepTrailingSemicolon
}
}
func WithKeepIdentifierQuotation(keepIdentifierQuotation bool) normalizerOption {
return func(c *normalizerConfig) {
c.KeepIdentifierQuotation = keepIdentifierQuotation
}
}
type StatementMetadata struct {
Size int `json:"size"`
Tables []string `json:"tables"`
Comments []string `json:"comments"`
Commands []string `json:"commands"`
Procedures []string `json:"procedures"`
}
type groupablePlaceholder struct {
groupable bool
}
type Normalizer struct {
config *normalizerConfig
}
func NewNormalizer(opts ...normalizerOption) *Normalizer {
normalizer := Normalizer{
config: &normalizerConfig{},
}
for _, opt := range opts {
opt(normalizer.config)
}
return &normalizer
}
// Normalize takes an input SQL string and returns a normalized SQL string, a StatementMetadata struct, and an error.
// The normalizer collapses input SQL into compact format, groups obfuscated values into single placeholder,
// and collects metadata such as table names, comments, and commands.
func (n *Normalizer) Normalize(input string, lexerOpts ...lexerOption) (normalizedSQL string, statementMetadata *StatementMetadata, err error) {
lexer := New(
input,
lexerOpts...,
)
var normalizedSQLBuilder strings.Builder
statementMetadata = &StatementMetadata{
Tables: []string{},
Comments: []string{},
Commands: []string{},
Procedures: []string{},
}
var lastToken Token // The last token that is not whitespace or comment
var groupablePlaceholder groupablePlaceholder
for {
token := lexer.Scan()
if token.Type == EOF {
break
}
n.collectMetadata(&token, &lastToken, statementMetadata)
n.normalizeSQL(&token, &lastToken, &normalizedSQLBuilder, &groupablePlaceholder, lexerOpts...)
}
normalizedSQL = normalizedSQLBuilder.String()
// Dedupe collected metadata
dedupeStatementMetadata(statementMetadata)
return n.trimNormalizedSQL(normalizedSQL), statementMetadata, nil
}
func (n *Normalizer) collectMetadata(token *Token, lastToken *Token, statementMetadata *StatementMetadata) {
if n.config.CollectComments && (token.Type == COMMENT || token.Type == MULTILINE_COMMENT) {
// Collect comments
statementMetadata.Comments = append(statementMetadata.Comments, token.Value)
} else if token.Type == IDENT || token.Type == QUOTED_IDENT || token.Type == FUNCTION {
tokenVal := token.Value
if token.Type == QUOTED_IDENT {
// We always want to trim the quotes for collected metadata such as table names
// This is because the metadata is used as tags, and we don't want them to be normalized as underscores later on
tokenVal = trimQuotes(tokenVal, tokenVal[0:1], tokenVal[len(tokenVal)-1:])
if !n.config.KeepIdentifierQuotation {
token.Value = tokenVal
}
}
if n.config.CollectCommands && isCommand(strings.ToUpper(tokenVal)) {
// Collect commands
statementMetadata.Commands = append(statementMetadata.Commands, strings.ToUpper(tokenVal))
} else if n.config.CollectTables && isTableIndicator(strings.ToUpper(lastToken.Value)) && !isSQLKeyword(token) {
// Collect table names
statementMetadata.Tables = append(statementMetadata.Tables, tokenVal)
} else if n.config.CollectProcedure && isProcedure(lastToken) {
// Collect procedure names
statementMetadata.Procedures = append(statementMetadata.Procedures, tokenVal)
}
}
}
func (n *Normalizer) normalizeSQL(token *Token, lastToken *Token, normalizedSQLBuilder *strings.Builder, groupablePlaceholder *groupablePlaceholder, lexerOpts ...lexerOption) {
if token.Type != WS && token.Type != COMMENT && token.Type != MULTILINE_COMMENT {
if token.Type == DOLLAR_QUOTED_FUNCTION && token.Value != StringPlaceholder {
// if the token is a dollar quoted function and it is not obfuscated,
// we need to recusively normalize the content of the dollar quoted function
quotedFunc := token.Value[6 : len(token.Value)-6] // remove the $func$ prefix and suffix
normalizedQuotedFunc, _, err := n.Normalize(quotedFunc, lexerOpts...)
if err == nil {
// replace the content of the dollar quoted function with the normalized content
// if there is an error, we just keep the original content
var normalizedDollarQuotedFunc strings.Builder
normalizedDollarQuotedFunc.WriteString("$func$")
normalizedDollarQuotedFunc.WriteString(normalizedQuotedFunc)
normalizedDollarQuotedFunc.WriteString("$func$")
token.Value = normalizedDollarQuotedFunc.String()
}
}
if !n.config.KeepSQLAlias {
// discard SQL alias
if strings.ToUpper(token.Value) == "AS" {
// if current token is AS, then continue to next token
// because without seeing the next token, we cannot
// determine if the current token is an alias or not
*lastToken = *token
return
}
if strings.ToUpper(lastToken.Value) == "AS" {
if token.Type == IDENT && !isSQLKeyword(token) {
// if the last token is AS and the current token is IDENT,
// then the current token is an alias, so we discard it
*lastToken = *token
return
} else {
// if the last token is AS and the current token is not IDENT,
// this could be a CTE like WITH ... AS (...),
// so we do not discard the current token
n.appendWhitespace(lastToken, token, normalizedSQLBuilder)
n.writeToken(lastToken, normalizedSQLBuilder)
}
}
}
// group consecutive obfuscated values into single placeholder
if n.isObfuscatedValueGroupable(token, lastToken, groupablePlaceholder, normalizedSQLBuilder) {
// return the token but not write it to the normalizedSQLBuilder
*lastToken = *token
return
}
// determine if we should add a whitespace
n.appendWhitespace(lastToken, token, normalizedSQLBuilder)
n.writeToken(token, normalizedSQLBuilder)
*lastToken = *token
}
}
func (n *Normalizer) writeToken(token *Token, normalizedSQLBuilder *strings.Builder) {
if n.config.UppercaseKeywords && isSQLKeyword(token) {
normalizedSQLBuilder.WriteString(strings.ToUpper(token.Value))
} else {
normalizedSQLBuilder.WriteString(token.Value)
}
}
func (n *Normalizer) isObfuscatedValueGroupable(token *Token, lastToken *Token, groupablePlaceholder *groupablePlaceholder, normalizedSQLBuilder *strings.Builder) bool {
if token.Value == NumberPlaceholder || token.Value == StringPlaceholder {
if lastToken.Value == "(" || lastToken.Value == "[" {
// if the last token is "(" or "[", and the current token is a placeholder,
// we know it's the start of groupable placeholders
// we don't return here because we still need to write the first placeholder
groupablePlaceholder.groupable = true
} else if lastToken.Value == "," && groupablePlaceholder.groupable {
return true
}
}
if (lastToken.Value == NumberPlaceholder || lastToken.Value == StringPlaceholder) && token.Value == "," && groupablePlaceholder.groupable {
return true
}
if groupablePlaceholder.groupable && (token.Value == ")" || token.Value == "]") {
// end of groupable placeholders
groupablePlaceholder.groupable = false
return false
}
if groupablePlaceholder.groupable && token.Value != NumberPlaceholder && token.Value != StringPlaceholder && lastToken.Value == "," {
// This is a tricky edge case. If we are inside a groupbale block, and the current token is not a placeholder,
// we not only want to write the current token to the normalizedSQLBuilder, but also write the last comma that we skipped.
// For example, (?, ARRAY[?, ?, ?]) should be normalized as (?, ARRAY[?])
normalizedSQLBuilder.WriteString(lastToken.Value)
return false
}
return false
}
func (n *Normalizer) appendWhitespace(lastToken *Token, token *Token, normalizedSQLBuilder *strings.Builder) {
// do not add a space between parentheses if RemoveSpaceBetweenParentheses is true
if n.config.RemoveSpaceBetweenParentheses && (lastToken.Type == FUNCTION || lastToken.Value == "(" || lastToken.Value == "[") {
return
}
if n.config.RemoveSpaceBetweenParentheses && (token.Value == ")" || token.Value == "]") {
return
}
switch token.Value {
case ",":
case ";":
case "=":
if lastToken.Value == ":" {
// do not add a space before an equals if a colon was
// present before it.
break
}
fallthrough
default:
normalizedSQLBuilder.WriteString(" ")
}
}
func (n *Normalizer) trimNormalizedSQL(normalizedSQL string) string {
if !n.config.KeepTrailingSemicolon {
// Remove trailing semicolon
normalizedSQL = strings.TrimSuffix(normalizedSQL, ";")
}
return strings.TrimSpace(normalizedSQL)
}
func dedupeCollectedMetadata(metadata []string) (dedupedMetadata []string, size int) {
// Dedupe collected metadata
// e.g. [SELECT, JOIN, SELECT, JOIN] -> [SELECT, JOIN]
dedupedMetadata = []string{}
var metadataSeen = make(map[string]struct{})
for _, m := range metadata {
if _, seen := metadataSeen[m]; !seen {
metadataSeen[m] = struct{}{}
dedupedMetadata = append(dedupedMetadata, m)
size += len(m)
}
}
return dedupedMetadata, size
}
func dedupeStatementMetadata(info *StatementMetadata) {
var tablesSize, commentsSize, commandsSize, procedureSize int
info.Tables, tablesSize = dedupeCollectedMetadata(info.Tables)
info.Comments, commentsSize = dedupeCollectedMetadata(info.Comments)
info.Commands, commandsSize = dedupeCollectedMetadata(info.Commands)
info.Procedures, procedureSize = dedupeCollectedMetadata(info.Procedures)
info.Size += tablesSize + commentsSize + commandsSize + procedureSize
}

View File

@@ -0,0 +1,41 @@
package sqllexer
import "strings"
// ObfuscateAndNormalize takes an input SQL string and returns an normalized SQL string with metadata
// This function is a convenience function that combines the Obfuscator and Normalizer in one pass
func ObfuscateAndNormalize(input string, obfuscator *Obfuscator, normalizer *Normalizer, lexerOpts ...lexerOption) (normalizedSQL string, statementMetadata *StatementMetadata, err error) {
lexer := New(
input,
lexerOpts...,
)
var normalizedSQLBuilder strings.Builder
statementMetadata = &StatementMetadata{
Tables: []string{},
Comments: []string{},
Commands: []string{},
Procedures: []string{},
}
var lastToken Token // The last token that is not whitespace or comment
var groupablePlaceholder groupablePlaceholder
for {
token := lexer.Scan()
if token.Type == EOF {
break
}
token.Value = obfuscator.ObfuscateTokenValue(token, lexerOpts...)
normalizer.collectMetadata(&token, &lastToken, statementMetadata)
normalizer.normalizeSQL(&token, &lastToken, &normalizedSQLBuilder, &groupablePlaceholder, lexerOpts...)
}
normalizedSQL = normalizedSQLBuilder.String()
// Dedupe collected metadata
dedupeStatementMetadata(statementMetadata)
return normalizer.trimNormalizedSQL(normalizedSQL), statementMetadata, nil
}

128
vendor/github.com/DataDog/go-sqllexer/obfuscator.go generated vendored Normal file
View File

@@ -0,0 +1,128 @@
package sqllexer
import (
"strings"
)
type obfuscatorConfig struct {
DollarQuotedFunc bool `json:"dollar_quoted_func"`
ReplaceDigits bool `json:"replace_digits"`
ReplacePositionalParameter bool `json:"replace_positional_parameter"`
ReplaceBoolean bool `json:"replace_boolean"`
ReplaceNull bool `json:"replace_null"`
}
type obfuscatorOption func(*obfuscatorConfig)
func WithReplaceDigits(replaceDigits bool) obfuscatorOption {
return func(c *obfuscatorConfig) {
c.ReplaceDigits = replaceDigits
}
}
func WithReplacePositionalParameter(replacePositionalParameter bool) obfuscatorOption {
return func(c *obfuscatorConfig) {
c.ReplacePositionalParameter = replacePositionalParameter
}
}
func WithReplaceBoolean(replaceBoolean bool) obfuscatorOption {
return func(c *obfuscatorConfig) {
c.ReplaceBoolean = replaceBoolean
}
}
func WithReplaceNull(replaceNull bool) obfuscatorOption {
return func(c *obfuscatorConfig) {
c.ReplaceNull = replaceNull
}
}
func WithDollarQuotedFunc(dollarQuotedFunc bool) obfuscatorOption {
return func(c *obfuscatorConfig) {
c.DollarQuotedFunc = dollarQuotedFunc
}
}
type Obfuscator struct {
config *obfuscatorConfig
}
func NewObfuscator(opts ...obfuscatorOption) *Obfuscator {
obfuscator := &Obfuscator{
config: &obfuscatorConfig{},
}
for _, opt := range opts {
opt(obfuscator.config)
}
return obfuscator
}
const (
StringPlaceholder = "?"
NumberPlaceholder = "?"
)
// Obfuscate takes an input SQL string and returns an obfuscated SQL string.
// The obfuscator replaces all literal values with a single placeholder
func (o *Obfuscator) Obfuscate(input string, lexerOpts ...lexerOption) string {
var obfuscatedSQL strings.Builder
lexer := New(
input,
lexerOpts...,
)
for {
token := lexer.Scan()
if token.Type == EOF {
break
}
obfuscatedSQL.WriteString(o.ObfuscateTokenValue(token, lexerOpts...))
}
return strings.TrimSpace(obfuscatedSQL.String())
}
func (o *Obfuscator) ObfuscateTokenValue(token Token, lexerOpts ...lexerOption) string {
switch token.Type {
case NUMBER:
return NumberPlaceholder
case DOLLAR_QUOTED_FUNCTION:
if o.config.DollarQuotedFunc {
// obfuscate the content of dollar quoted function
quotedFunc := token.Value[6 : len(token.Value)-6] // remove the $func$ prefix and suffix
var obfuscatedDollarQuotedFunc strings.Builder
obfuscatedDollarQuotedFunc.WriteString("$func$")
obfuscatedDollarQuotedFunc.WriteString(o.Obfuscate(quotedFunc, lexerOpts...))
obfuscatedDollarQuotedFunc.WriteString("$func$")
return obfuscatedDollarQuotedFunc.String()
} else {
return StringPlaceholder
}
case STRING, INCOMPLETE_STRING, DOLLAR_QUOTED_STRING:
return StringPlaceholder
case POSITIONAL_PARAMETER:
if o.config.ReplacePositionalParameter {
return StringPlaceholder
} else {
return token.Value
}
case IDENT, QUOTED_IDENT:
if o.config.ReplaceBoolean && isBoolean(token.Value) {
return StringPlaceholder
}
if o.config.ReplaceNull && isNull(token.Value) {
return StringPlaceholder
}
if o.config.ReplaceDigits {
return replaceDigits(token.Value, NumberPlaceholder)
} else {
return token.Value
}
default:
return token.Value
}
}

482
vendor/github.com/DataDog/go-sqllexer/sqllexer.go generated vendored Normal file
View File

@@ -0,0 +1,482 @@
package sqllexer
import "unicode/utf8"
type TokenType int
const (
ERROR TokenType = iota
EOF
WS // whitespace
STRING // string literal
INCOMPLETE_STRING // incomplete string literal so that we can obfuscate it, e.g. 'abc
NUMBER // number literal
IDENT // identifier
QUOTED_IDENT // quoted identifier
OPERATOR // operator
WILDCARD // wildcard *
COMMENT // comment
MULTILINE_COMMENT // multiline comment
PUNCTUATION // punctuation
DOLLAR_QUOTED_FUNCTION // dollar quoted function
DOLLAR_QUOTED_STRING // dollar quoted string
POSITIONAL_PARAMETER // numbered parameter
BIND_PARAMETER // bind parameter
FUNCTION // function
SYSTEM_VARIABLE // system variable
UNKNOWN // unknown token
)
// Token represents a SQL token with its type and value.
type Token struct {
Type TokenType
Value string
}
type LexerConfig struct {
DBMS DBMSType `json:"dbms,omitempty"`
}
type lexerOption func(*LexerConfig)
func WithDBMS(dbms DBMSType) lexerOption {
return func(c *LexerConfig) {
c.DBMS = dbms
}
}
// SQL Lexer inspired from Rob Pike's talk on Lexical Scanning in Go
type Lexer struct {
src string // the input src string
cursor int // the current position of the cursor
start int // the start position of the current token
config *LexerConfig
}
func New(input string, opts ...lexerOption) *Lexer {
lexer := &Lexer{src: input, config: &LexerConfig{}}
for _, opt := range opts {
opt(lexer.config)
}
return lexer
}
// ScanAll scans the entire input string and returns a slice of tokens.
func (s *Lexer) ScanAll() []Token {
var tokens []Token
for {
token := s.Scan()
if token.Type == EOF {
// don't include EOF token in the result
break
}
tokens = append(tokens, token)
}
return tokens
}
// ScanAllTokens scans the entire input string and returns a channel of tokens.
// Use this if you want to process the tokens as they are scanned.
func (s *Lexer) ScanAllTokens() <-chan Token {
tokenCh := make(chan Token)
go func() {
defer close(tokenCh)
for {
token := s.Scan()
if token.Type == EOF {
// don't include EOF token in the result
break
}
tokenCh <- token
}
}()
return tokenCh
}
// Scan scans the next token and returns it.
func (s *Lexer) Scan() Token {
ch := s.peek()
switch {
case isWhitespace(ch):
return s.scanWhitespace()
case isLetter(ch):
return s.scanIdentifier(ch)
case isDoubleQuote(ch):
return s.scanDoubleQuotedIdentifier('"')
case isSingleQuote(ch):
return s.scanString()
case isSingleLineComment(ch, s.lookAhead(1)):
return s.scanSingleLineComment()
case isMultiLineComment(ch, s.lookAhead(1)):
return s.scanMultiLineComment()
case isLeadingSign(ch):
// if the leading sign is followed by a digit, then it's a number
// although this is not strictly true, it's good enough for our purposes
nextCh := s.lookAhead(1)
if isDigit(nextCh) || nextCh == '.' {
return s.scanNumberWithLeadingSign()
}
return s.scanOperator(ch)
case isDigit(ch):
return s.scanNumber(ch)
case isWildcard(ch):
return s.scanWildcard()
case ch == '$':
if isDigit(s.lookAhead(1)) {
// if the dollar sign is followed by a digit, then it's a numbered parameter
return s.scanPositionalParameter()
}
if s.config.DBMS == DBMSSQLServer && isLetter(s.lookAhead(1)) {
return s.scanIdentifier(ch)
}
return s.scanDollarQuotedString()
case ch == ':':
if s.config.DBMS == DBMSOracle && isAlphaNumeric(s.lookAhead(1)) {
return s.scanBindParameter()
}
return s.scanOperator(ch)
case ch == '`':
if s.config.DBMS == DBMSMySQL {
return s.scanDoubleQuotedIdentifier('`')
}
fallthrough
case ch == '#':
if s.config.DBMS == DBMSSQLServer {
return s.scanIdentifier(ch)
} else if s.config.DBMS == DBMSMySQL {
// MySQL treats # as a comment
return s.scanSingleLineComment()
}
fallthrough
case ch == '@':
if isAlphaNumeric(s.lookAhead(1)) {
return s.scanBindParameter()
} else if s.lookAhead(1) == '@' {
return s.scanSystemVariable()
}
fallthrough
case isOperator(ch):
return s.scanOperator(ch)
case isPunctuation(ch):
if ch == '[' && s.config.DBMS == DBMSSQLServer {
return s.scanDoubleQuotedIdentifier('[')
}
return s.scanPunctuation()
case isEOF(ch):
return Token{EOF, ""}
default:
return s.scanUnknown()
}
}
// lookAhead returns the rune n positions ahead of the cursor.
func (s *Lexer) lookAhead(n int) rune {
if s.cursor+n >= len(s.src) || s.cursor+n < 0 {
return 0
}
r, _ := utf8.DecodeRuneInString(s.src[s.cursor+n:])
return r
}
// peek returns the rune at the cursor position.
func (s *Lexer) peek() rune {
return s.lookAhead(0)
}
// nextBy advances the cursor by n positions and returns the rune at the cursor position.
func (s *Lexer) nextBy(n int) rune {
// advance the cursor by n and return the rune at the cursor position
if s.cursor+n > len(s.src) {
return 0
}
s.cursor += n
if s.cursor >= len(s.src) {
return 0
}
r, _ := utf8.DecodeRuneInString(s.src[s.cursor:])
return r
}
// next advances the cursor by 1 position and returns the rune at the cursor position.
func (s *Lexer) next() rune {
return s.nextBy(1)
}
func (s *Lexer) matchAt(match []rune) bool {
if s.cursor+len(match) > len(s.src) {
return false
}
for i, ch := range match {
if s.src[s.cursor+i] != byte(ch) {
return false
}
}
return true
}
func (s *Lexer) scanNumberWithLeadingSign() Token {
s.start = s.cursor
ch := s.next() // consume the leading sign
return s.scanNumberic(ch)
}
func (s *Lexer) scanNumber(ch rune) Token {
s.start = s.cursor
return s.scanNumberic(ch)
}
func (s *Lexer) scanNumberic(ch rune) Token {
if ch == '0' {
nextCh := s.lookAhead(1)
if nextCh == 'x' || nextCh == 'X' {
return s.scanHexNumber()
} else if nextCh >= '0' && nextCh <= '7' {
return s.scanOctalNumber()
}
}
return s.scanDecimalNumber()
}
func (s *Lexer) scanDecimalNumber() Token {
ch := s.next()
// scan digits
for isDigit(ch) || ch == '.' || isExpontent(ch) {
if isExpontent(ch) {
ch = s.next()
if isLeadingSign(ch) {
ch = s.next()
}
} else {
ch = s.next()
}
}
return Token{NUMBER, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanHexNumber() Token {
ch := s.nextBy(2) // consume the leading 0x
for isDigit(ch) || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F') {
ch = s.next()
}
return Token{NUMBER, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanOctalNumber() Token {
ch := s.nextBy(2) // consume the leading 0 and number
for '0' <= ch && ch <= '7' {
ch = s.next()
}
return Token{NUMBER, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanString() Token {
s.start = s.cursor
ch := s.next() // consume the opening quote
escaped := false
for {
if escaped {
// encountered an escape character
// reset the escaped flag and continue
escaped = false
ch = s.next()
continue
}
if ch == '\\' {
escaped = true
ch = s.next()
continue
}
if ch == '\'' {
s.next() // consume the closing quote
return Token{STRING, s.src[s.start:s.cursor]}
}
if isEOF(ch) {
// encountered EOF before closing quote
// this usually happens when the string is truncated
return Token{INCOMPLETE_STRING, s.src[s.start:s.cursor]}
}
ch = s.next()
}
}
func (s *Lexer) scanIdentifier(ch rune) Token {
// NOTE: this func does not distinguish between SQL keywords and identifiers
s.start = s.cursor
ch = s.nextBy(utf8.RuneLen(ch))
for isLetter(ch) || isDigit(ch) || ch == '.' || ch == '?' || ch == '$' || ch == '#' {
ch = s.nextBy(utf8.RuneLen(ch))
}
if ch == '(' {
// if the identifier is followed by a (, then it's a function
return Token{FUNCTION, s.src[s.start:s.cursor]}
}
return Token{IDENT, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanDoubleQuotedIdentifier(delimiter rune) Token {
closingDelimiter := delimiter
if delimiter == '[' {
closingDelimiter = ']'
}
s.start = s.cursor
ch := s.next() // consume the opening quote
for {
// encountered the closing quote
// BUT if it's followed by .", then we should keep going
// e.g. postgre "foo"."bar"
// e.g. sqlserver [foo].[bar]
if ch == closingDelimiter {
specialCase := []rune{closingDelimiter, '.', delimiter}
if s.matchAt([]rune(specialCase)) {
ch = s.nextBy(3) // consume the "."
continue
}
break
}
if isEOF(ch) {
return Token{ERROR, s.src[s.start:s.cursor]}
}
ch = s.next()
}
s.next() // consume the closing quote
return Token{QUOTED_IDENT, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanWhitespace() Token {
// scan whitespace, tab, newline, carriage return
s.start = s.cursor
ch := s.next()
for isWhitespace(ch) {
ch = s.next()
}
return Token{WS, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanOperator(lastCh rune) Token {
s.start = s.cursor
ch := s.next()
for isOperator(ch) && !(lastCh == '=' && ch == '?') {
// hack: we don't want to treat "=?" as an single operator
lastCh = ch
ch = s.next()
}
return Token{OPERATOR, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanWildcard() Token {
s.start = s.cursor
s.next()
return Token{WILDCARD, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanSingleLineComment() Token {
s.start = s.cursor
ch := s.nextBy(2) // consume the opening dashes
for ch != '\n' && !isEOF(ch) {
ch = s.next()
}
return Token{COMMENT, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanMultiLineComment() Token {
s.start = s.cursor
ch := s.nextBy(2) // consume the opening slash and asterisk
for {
if ch == '*' && s.lookAhead(1) == '/' {
s.nextBy(2) // consume the closing asterisk and slash
break
}
if isEOF(ch) {
// encountered EOF before closing comment
// this usually happens when the comment is truncated
return Token{ERROR, s.src[s.start:s.cursor]}
}
ch = s.next()
}
return Token{MULTILINE_COMMENT, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanPunctuation() Token {
s.start = s.cursor
s.next()
return Token{PUNCTUATION, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanDollarQuotedString() Token {
s.start = s.cursor
ch := s.next() // consume the dollar sign
tagStart := s.cursor
for s.cursor < len(s.src) && ch != '$' {
ch = s.next()
}
s.next() // consume the closing dollar sign of the tag
tag := s.src[tagStart-1 : s.cursor] // include the opening and closing dollar sign e.g. $tag$
for s.cursor < len(s.src) {
if s.matchAt([]rune(tag)) {
s.nextBy(len(tag)) // consume the closing tag
if tag == "$func$" {
return Token{DOLLAR_QUOTED_FUNCTION, s.src[s.start:s.cursor]}
}
return Token{DOLLAR_QUOTED_STRING, s.src[s.start:s.cursor]}
}
s.next()
}
return Token{ERROR, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanPositionalParameter() Token {
s.start = s.cursor
ch := s.nextBy(2) // consume the dollar sign and the number
for {
if !isDigit(ch) {
break
}
ch = s.next()
}
return Token{POSITIONAL_PARAMETER, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanBindParameter() Token {
s.start = s.cursor
ch := s.nextBy(2) // consume the (colon|at sign) and the char
for {
if !isAlphaNumeric(ch) {
break
}
ch = s.next()
}
return Token{BIND_PARAMETER, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanSystemVariable() Token {
s.start = s.cursor
ch := s.nextBy(2) // consume @@
for {
if !isAlphaNumeric(ch) {
break
}
ch = s.next()
}
return Token{SYSTEM_VARIABLE, s.src[s.start:s.cursor]}
}
func (s *Lexer) scanUnknown() Token {
// When we see an unknown token, we advance the cursor until we see something that looks like a token boundary.
s.start = s.cursor
s.next()
return Token{UNKNOWN, s.src[s.start:s.cursor]}
}

256
vendor/github.com/DataDog/go-sqllexer/sqllexer_utils.go generated vendored Normal file
View File

@@ -0,0 +1,256 @@
package sqllexer
import (
"strings"
"unicode"
)
type DBMSType string
const (
// DBMSSQLServer is a MS SQL Server
DBMSSQLServer DBMSType = "mssql"
// DBMSPostgres is a PostgreSQL Server
DBMSPostgres DBMSType = "postgresql"
// DBMSMySQL is a MySQL Server
DBMSMySQL DBMSType = "mysql"
// DBMSOracle is a Oracle Server
DBMSOracle DBMSType = "oracle"
)
var commands = map[string]bool{
"SELECT": true,
"INSERT": true,
"UPDATE": true,
"DELETE": true,
"CREATE": true,
"ALTER": true,
"DROP": true,
"JOIN": true,
"GRANT": true,
"REVOKE": true,
"COMMIT": true,
"BEGIN": true,
"TRUNCATE": true,
"MERGE": true,
"EXECUTE": true,
"EXEC": true,
"EXPLAIN": true,
"STRAIGHT_JOIN": true,
}
var tableIndicators = map[string]bool{
"FROM": true,
"JOIN": true,
"INTO": true,
"UPDATE": true,
"TABLE": true,
"STRAIGHT_JOIN": true, // MySQL
}
var keywords = map[string]bool{
"SELECT": true,
"INSERT": true,
"UPDATE": true,
"DELETE": true,
"CREATE": true,
"ALTER": true,
"DROP": true,
"GRANT": true,
"REVOKE": true,
"ADD": true,
"ALL": true,
"AND": true,
"ANY": true,
"AS": true,
"ASC": true,
"BEGIN": true,
"BETWEEN": true,
"BY": true,
"CASE": true,
"CHECK": true,
"COLUMN": true,
"COMMIT": true,
"CONSTRAINT": true,
"DATABASE": true,
"DECLARE": true,
"DEFAULT": true,
"DESC": true,
"DISTINCT": true,
"ELSE": true,
"END": true,
"EXEC": true,
"EXISTS": true,
"FOREIGN": true,
"FROM": true,
"GROUP": true,
"HAVING": true,
"IN": true,
"INDEX": true,
"INNER": true,
"INTO": true,
"IS": true,
"JOIN": true,
"KEY": true,
"LEFT": true,
"LIKE": true,
"LIMIT": true,
"NOT": true,
"ON": true,
"OR": true,
"ORDER": true,
"OUTER": true,
"PRIMARY": true,
"PROCEDURE": true,
"REPLACE": true,
"RETURNS": true,
"RIGHT": true,
"ROLLBACK": true,
"ROWNUM": true,
"SET": true,
"SOME": true,
"TABLE": true,
"TOP": true,
"TRUNCATE": true,
"UNION": true,
"UNIQUE": true,
"USE": true,
"VALUES": true,
"VIEW": true,
"WHERE": true,
"CUBE": true,
"ROLLUP": true,
"LITERAL": true,
"WINDOW": true,
"VACCUM": true,
"ANALYZE": true,
"ILIKE": true,
"USING": true,
"ASSERTION": true,
"DOMAIN": true,
"CLUSTER": true,
"COPY": true,
"EXPLAIN": true,
"PLPGSQL": true,
"TRIGGER": true,
"TEMPORARY": true,
"UNLOGGED": true,
"RECURSIVE": true,
"RETURNING": true,
"OFFSET": true,
"OF": true,
"SKIP": true,
}
func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
}
func isDigit(ch rune) bool {
return '0' <= ch && ch <= '9'
}
func isExpontent(ch rune) bool {
return ch == 'e' || ch == 'E'
}
func isLeadingSign(ch rune) bool {
return ch == '+' || ch == '-'
}
func isLetter(ch rune) bool {
return unicode.IsLetter(ch) || ch == '_'
}
func isAlphaNumeric(ch rune) bool {
return isLetter(ch) || isDigit(ch)
}
func isDoubleQuote(ch rune) bool {
return ch == '"'
}
func isSingleQuote(ch rune) bool {
return ch == '\''
}
func isOperator(ch rune) bool {
return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '=' || ch == '<' || ch == '>' || ch == '!' || ch == '&' || ch == '|' || ch == '^' || ch == '%' || ch == '~' || ch == '?' || ch == '@' || ch == ':' || ch == '#'
}
func isWildcard(ch rune) bool {
return ch == '*'
}
func isSingleLineComment(ch rune, nextCh rune) bool {
return ch == '-' && nextCh == '-'
}
func isMultiLineComment(ch rune, nextCh rune) bool {
return ch == '/' && nextCh == '*'
}
func isPunctuation(ch rune) bool {
return ch == '(' || ch == ')' || ch == ',' || ch == ';' || ch == '.' || ch == ':' || ch == '[' || ch == ']' || ch == '{' || ch == '}'
}
func isEOF(ch rune) bool {
return ch == 0
}
func isCommand(ident string) bool {
_, ok := commands[ident]
return ok
}
func isTableIndicator(ident string) bool {
_, ok := tableIndicators[ident]
return ok
}
func isSQLKeyword(token *Token) bool {
if token.Type != IDENT {
return false
}
_, ok := keywords[strings.ToUpper(token.Value)]
return ok
}
func isProcedure(token *Token) bool {
if token.Type != IDENT {
return false
}
return strings.ToUpper(token.Value) == "PROCEDURE" || strings.ToUpper(token.Value) == "PROC"
}
func isBoolean(ident string) bool {
return strings.ToUpper(ident) == "TRUE" || strings.ToUpper(ident) == "FALSE"
}
func isNull(ident string) bool {
return strings.ToUpper(ident) == "NULL"
}
func replaceDigits(input string, placeholder string) string {
var builder strings.Builder
i := 0
for i < len(input) {
if isDigit(rune(input[i])) {
builder.WriteString(placeholder)
for i < len(input) && isDigit(rune(input[i])) {
i++
}
} else {
builder.WriteByte(input[i])
i++
}
}
return builder.String()
}
func trimQuotes(input string, delim string, closingDelim string) string {
replacer := strings.NewReplacer(delim, "", closingDelim, "")
return replacer.Replace(input)
}

View File

@@ -0,0 +1,54 @@
# Test Suite
The test suite is a collection of test SQL statements that are organized per DBMS. The test suite is used to test the SQL obfuscator and normalizer for correctness and completeness. It is also intended to cover DBMS specific edge cases, that are not covered by the generic unit tests.
## Test Suite Structure
The test suite is organized in the following way:
```text
testdata
├── README.md
├── dbms1
│   ├── query_type1
│   │   ├── test1.json
│   └── query_type2
│   ├── test1.json
dbms_test.go
```
The test suite is organized per DBMS. Each DBMS has a number of query types. Each query type has a number of test cases. Each test case consists of a SQL statement and the expected output of the obfuscator/normalizer.
## Test File Format
The test files are simple json files where each test case comes with one input SQL statements and an array of expected outputs.
Each expected output can optionally come with a configuration for the obfuscator and normalizer. The configuration is optional, because the default configuration is used if no configuration is provided.
testcase.json:
```json
{
"input": "SELECT * FROM table1",
"outputs": [
{
// Test case 1
"expected": "SELECT * FROM table1",
"obfuscator_config": {...}, // optional
"normalizer_config": {...} // optional
},
{
// Test case 2
"expected": "SELECT * FROM table1",
"obfuscator_config": {...}, // optional
"normalizer_config": {...} // optional
}
]
}
```
## How to write a new test case
1. Create a new directory for the DBMS, if it does not exist yet. (this step is often not necessary)
2. Create a new directory for the query type, if it does not exist yet.
3. Create a new test case `.json` file with the SQL statement and expected output. Refer to the [test file format](#test-file-format) or `testcase struct` in [dbms_test.go](../dbms_test.go) for more details.
4. Run the test suite to verify that the test case is working as expected.

View File

@@ -0,0 +1,16 @@
{
"input": "WITH ComplexCTE AS (SELECT t1.id, t2.amount, ROW_NUMBER() OVER(PARTITION BY t1.customer_id ORDER BY t2.amount DESC) AS rn FROM (SELECT id, customer_id, status FROM orders WHERE YEAR(order_date) = YEAR(GETDATE()) AND status NOT IN ('Cancelled', 'Returned')) t1 INNER JOIN (SELECT order_id, SUM(amount) AS amount FROM order_details GROUP BY order_id) t2 ON t1.id = t2.order_id WHERE t2.amount > 500), SecondCTE AS (SELECT c1.*, c2.name, c2.region FROM ComplexCTE c1 INNER JOIN customers c2 ON c1.customer_id = c2.id WHERE c2.region IN ('East', 'West') AND c1.rn < 5) SELECT s.id, s.name, s.amount, p.product_name, CASE WHEN s.amount > 1000 THEN 'High' ELSE 'Low' END AS ValueCategory FROM SecondCTE s LEFT JOIN (SELECT DISTINCT p1.order_id, p2.product_name FROM order_products p1 INNER JOIN products p2 ON p1.product_id = p2.id) p ON s.id = p.order_id WHERE s.region = 'East' AND s.status LIKE '%Active%' ORDER BY s.amount DESC, s.name;",
"outputs": [
{
"expected": "WITH ComplexCTE AS ( SELECT t?.id, t?.amount, ROW_NUMBER ( ) OVER ( PARTITION BY t?.customer_id ORDER BY t?.amount DESC ) FROM ( SELECT id, customer_id, status FROM orders WHERE YEAR ( order_date ) = YEAR ( GETDATE ( ) ) AND status NOT IN ( ? ) ) t? INNER JOIN ( SELECT order_id, SUM ( amount ) FROM order_details GROUP BY order_id ) t? ON t?.id = t?.order_id WHERE t?.amount > ? ), SecondCTE AS ( SELECT c?. *, c?.name, c?.region FROM ComplexCTE c? INNER JOIN customers c? ON c?.customer_id = c?.id WHERE c?.region IN ( ? ) AND c?.rn < ? ) SELECT s.id, s.name, s.amount, p.product_name, CASE WHEN s.amount > ? THEN ? ELSE ? END FROM SecondCTE s LEFT JOIN ( SELECT DISTINCT p?.order_id, p?.product_name FROM order_products p? INNER JOIN products p? ON p?.product_id = p?.id ) p ON s.id = p.order_id WHERE s.region = ? AND s.status LIKE ? ORDER BY s.amount DESC, s.name",
"statement_metadata": {
"size": 79,
"tables": ["orders", "order_details", "ComplexCTE", "customers", "SecondCTE", "order_products", "products"],
"commands": ["SELECT", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,17 @@
{
"input": "CREATE VIEW dbo.OrderSummary WITH SCHEMABINDING AS SELECT customer_id, COUNT_BIG(*) AS TotalOrders, SUM(amount) AS TotalAmount FROM dbo.orders GROUP BY customer_id; CREATE UNIQUE CLUSTERED INDEX IDX_V1 ON dbo.OrderSummary(customer_id);",
"outputs": [
{
"expected": "CREATE VIEW dbo.OrderSummary WITH SCHEMABINDING AS SELECT customer_id, COUNT_BIG ( * ), SUM ( amount ) FROM dbo.orders GROUP BY customer_id; CREATE UNIQUE CLUSTERED INDEX IDX_V? ON dbo.OrderSummary ( customer_id )",
"statement_metadata": {
"size": 22,
"tables": ["dbo.orders"],
"commands": ["CREATE", "SELECT"],
"comments": [],
"procedures": [],
"views": ["dbo.OrderSummary"]
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "CREATE PARTITION FUNCTION myRangePF1 (INT) AS RANGE LEFT FOR VALUES (1, 100, 1000); CREATE PARTITION SCHEME myScheme AS PARTITION myRangePF1 TO ([PRIMARY], [SECONDARY], [TERTIARY]); CREATE TABLE partitionedTable (id INT) ON myScheme(id);",
"outputs": [
{
"expected": "CREATE PARTITION FUNCTION myRangePF? ( INT ) LEFT FOR VALUES ( ? ); CREATE PARTITION SCHEME myScheme myRangePF? TO ( PRIMARY, SECONDARY, TERTIARY ); CREATE TABLE partitionedTable ( id INT ) ON myScheme ( id )",
"statement_metadata": {
"size": 22,
"tables": ["partitionedTable"],
"commands": ["CREATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT a.*, b.totalAmount, CASE WHEN c.id IS NOT NULL THEN d.description ELSE 'N/A' END AS description FROM (SELECT id, name, status, customer_id FROM orders WHERE order_date > DATEADD(month, -6, GETDATE()) AND status IN ('Pending', 'Completed') AND customer_id IN (SELECT customer_id FROM customers WHERE region IN ('East', 'West') AND last_order_date > DATEADD(year, -1, GETDATE())) ORDER BY name DESC) a INNER JOIN (SELECT order_id, SUM(amount) AS totalAmount FROM order_details GROUP BY order_id) b ON a.id = b.order_id LEFT JOIN audit_log c ON a.id = c.order_id LEFT JOIN (SELECT DISTINCT status, description FROM status_descriptions) d ON a.status = d.status WHERE a.name LIKE '%test%' AND (b.totalAmount > 1000 OR b.totalAmount IS NULL) ORDER BY a.order_date DESC, a.name;",
"outputs": [
{
"expected": "SELECT a. *, b.totalAmount, CASE WHEN c.id IS NOT ? THEN d.description ELSE ? END FROM ( SELECT id, name, status, customer_id FROM orders WHERE order_date > DATEADD ( month, ?, GETDATE ( ) ) AND status IN ( ? ) AND customer_id IN ( SELECT customer_id FROM customers WHERE region IN ( ? ) AND last_order_date > DATEADD ( year, ?, GETDATE ( ) ) ) ORDER BY name DESC ) a INNER JOIN ( SELECT order_id, SUM ( amount ) FROM order_details GROUP BY order_id ) b ON a.id = b.order_id LEFT JOIN audit_log c ON a.id = c.order_id LEFT JOIN ( SELECT DISTINCT status, description FROM status_descriptions ) d ON a.status = d.status WHERE a.name LIKE ? AND ( b.totalAmount > ? OR b.totalAmount IS ? ) ORDER BY a.order_date DESC, a.name",
"statement_metadata": {
"size": 66,
"tables": ["orders", "customers", "order_details", "audit_log", "status_descriptions"],
"commands": ["SELECT", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM orders WHERE status = CASE WHEN order_date < GETDATE() - 90 THEN 'Expired' ELSE 'Active' END;",
"outputs": [
{
"expected": "DELETE FROM orders WHERE status = CASE WHEN order_date < GETDATE ( ) - ? THEN ? ELSE ? END",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM orders WHERE status = 'Cancelled';",
"outputs": [
{
"expected": "DELETE FROM orders WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM customers WHERE region = 'North'; -- Assuming CASCADE DELETE is set up on the foreign key in the orders table",
"outputs": [
{
"expected": "DELETE FROM customers WHERE region = ?",
"statement_metadata": {
"size": 90,
"tables": ["customers"],
"commands": ["DELETE"],
"comments": ["-- Assuming CASCADE DELETE is set up on the foreign key in the orders table"],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM orders WITH (ROWLOCK) WHERE status = 'Pending';",
"outputs": [
{
"expected": "DELETE FROM orders WITH ( ROWLOCK ) WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM orders WHERE customer_id IN (SELECT id FROM customers WHERE region = 'West');",
"outputs": [
{
"expected": "DELETE FROM orders WHERE customer_id IN ( SELECT id FROM customers WHERE region = ? )",
"statement_metadata": {
"size": 27,
"tables": ["orders", "customers"],
"commands": ["DELETE", "SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DECLARE @ExpiredOrders TABLE (id INT); INSERT INTO @ExpiredOrders (id) SELECT id FROM orders WHERE order_date < GETDATE() - 365; DELETE FROM orders WHERE id IN (SELECT id FROM @ExpiredOrders);",
"outputs": [
{
"expected": "DECLARE @ExpiredOrders TABLE ( id INT ); INSERT INTO @ExpiredOrders ( id ) SELECT id FROM orders WHERE order_date < GETDATE ( ) - ?; DELETE FROM orders WHERE id IN ( SELECT id FROM @ExpiredOrders )",
"statement_metadata": {
"size": 24,
"tables": ["orders"],
"commands": ["INSERT", "SELECT", "DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "WITH OldOrders AS (SELECT id FROM orders WHERE order_date < '2022-01-01') DELETE FROM orders WHERE id IN (SELECT id FROM OldOrders);",
"outputs": [
{
"expected": "WITH OldOrders AS ( SELECT id FROM orders WHERE order_date < ? ) DELETE FROM orders WHERE id IN ( SELECT id FROM OldOrders )",
"statement_metadata": {
"size": 27,
"tables": ["orders", "OldOrders"],
"commands": ["SELECT", "DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE o FROM orders o INNER JOIN customers c ON o.customer_id = c.id WHERE c.region = 'East' AND o.status = 'Pending';",
"outputs": [
{
"expected": "DELETE o FROM orders o INNER JOIN customers c ON o.customer_id = c.id WHERE c.region = ? AND o.status = ?",
"statement_metadata": {
"size": 25,
"tables": ["orders", "customers"],
"commands": ["DELETE", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM orders OUTPUT DELETED.* WHERE status = 'Shipped';",
"outputs": [
{
"expected": "DELETE FROM orders OUTPUT DELETED. * WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE TOP (10) FROM orders WHERE status = 'Pending';",
"outputs": [
{
"expected": "DELETE TOP ( ? ) FROM orders WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "INSERT INTO orders (customer_id, order_date, status) VALUES (1, GETDATE(), 'Pending');",
"outputs": [
{
"expected": "INSERT INTO orders ( customer_id, order_date, status ) VALUES ( ?, GETDATE ( ), ? )",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["INSERT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "INSERT INTO orders DEFAULT VALUES;",
"outputs": [
{
"expected": "INSERT INTO orders DEFAULT VALUES",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["INSERT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SET IDENTITY_INSERT orders ON; INSERT INTO orders (id, customer_id, order_date, status) VALUES (100, 3, GETDATE(), 'Pending'); SET IDENTITY_INSERT orders OFF;",
"outputs": [
{
"expected": "SET IDENTITY_INSERT orders ON; INSERT INTO orders ( id, customer_id, order_date, status ) VALUES ( ?, GETDATE ( ), ? ); SET IDENTITY_INSERT orders OFF",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["INSERT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "MERGE INTO orders AS target USING (SELECT customer_id, order_date, status FROM incoming_orders) AS source ON target.id = source.id WHEN NOT MATCHED THEN INSERT (customer_id, order_date, status) VALUES (source.customer_id, source.order_date, source.status);",
"outputs": [
{
"expected": "MERGE INTO orders USING ( SELECT customer_id, order_date, status FROM incoming_orders ) ON target.id = source.id WHEN NOT MATCHED THEN INSERT ( customer_id, order_date, status ) VALUES ( source.customer_id, source.order_date, source.status )",
"statement_metadata": {
"size": 38,
"tables": ["orders", "incoming_orders"],
"commands": ["MERGE", "SELECT", "INSERT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "INSERT INTO orders (customer_id, order_date, status) OUTPUT INSERTED.id VALUES (3, GETDATE(), 'Processing');",
"outputs": [
{
"expected": "INSERT INTO orders ( customer_id, order_date, status ) OUTPUT INSERTED.id VALUES ( ?, GETDATE ( ), ? )",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["INSERT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT * INTO new_orders FROM orders WHERE status = 'Pending';",
"outputs": [
{
"expected": "SELECT * INTO new_orders FROM orders WHERE status = ?",
"statement_metadata": {
"size": 22,
"tables": ["new_orders", "orders"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "INSERT INTO order_totals (order_id, total_amount) VALUES ((SELECT MAX(id) FROM orders), 500);",
"outputs": [
{
"expected": "INSERT INTO order_totals ( order_id, total_amount ) VALUES ( ( SELECT MAX ( id ) FROM orders ), ? )",
"statement_metadata": {
"size": 30,
"tables": ["order_totals", "orders"],
"commands": ["INSERT", "SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "INSERT INTO top_orders (id, amount) SELECT TOP 5 id, amount FROM orders ORDER BY amount DESC;",
"outputs": [
{
"expected": "INSERT INTO top_orders ( id, amount ) SELECT TOP ? id, amount FROM orders ORDER BY amount DESC",
"statement_metadata": {
"size": 28,
"tables": ["top_orders", "orders"],
"commands": ["INSERT", "SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "INSERT INTO customers (name, region) VALUES ('John Doe', 'North'), ('Jane Smith', 'South');",
"outputs": [
{
"expected": "INSERT INTO customers ( name, region ) VALUES ( ? ), ( ? )",
"statement_metadata": {
"size": 15,
"tables": ["customers"],
"commands": ["INSERT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "INSERT INTO orders_archive (id, customer_id, order_date, status) SELECT id, customer_id, order_date, status FROM orders WHERE status = 'Completed';",
"outputs": [
{
"expected": "INSERT INTO orders_archive ( id, customer_id, order_date, status ) SELECT id, customer_id, order_date, status FROM orders WHERE status = ?",
"statement_metadata": {
"size": 32,
"tables": ["orders_archive", "orders"],
"commands": ["INSERT", "SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "BEGIN TRY INSERT INTO orders (customer_id, amount) VALUES (1, -100); END TRY BEGIN CATCH THROW; END CATCH;",
"outputs": [
{
"expected": "BEGIN TRY INSERT INTO orders ( customer_id, amount ) VALUES ( ? ); END TRY BEGIN CATCH THROW; END CATCH",
"statement_metadata": {
"size": 17,
"tables": ["orders"],
"commands": ["BEGIN", "INSERT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE ManageCustomerOrders @customerId INT AS BEGIN SET NOCOUNT ON; IF NOT EXISTS (SELECT 1 FROM customers WHERE id = @customerId) BEGIN THROW 50001, 'Customer not found.', 1; END; UPDATE orders SET status = 'Reviewed' WHERE customer_id = @customerId AND status = 'Pending'; INSERT INTO audit_log (description) VALUES ('Orders reviewed for customer ' + CAST(@customerId AS NVARCHAR(10))); END;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE ManageCustomerOrders @customerId INT AS BEGIN SET NOCOUNT ON; IF NOT EXISTS (SELECT ? FROM customers WHERE id = @customerId) BEGIN THROW ?, ?, ?; END; UPDATE orders SET status = ? WHERE customer_id = @customerId AND status = ?; INSERT INTO audit_log (description) VALUES (? + CAST(@customerId AS NVARCHAR(?))); END;",
"statement_metadata": {
"size": 78,
"tables": ["customers", "orders", "audit_log"],
"commands": ["CREATE", "ALTER", "BEGIN", "SELECT", "UPDATE", "INSERT"],
"comments": [],
"procedures": ["ManageCustomerOrders"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE FullOrderManagement AS\nBEGIN\n-- Comprehensive procedure to manage order lifecycle\n-- It checks, processes, and logs orders.\nSET NOCOUNT ON;\n-- Check for new orders\nUPDATE orders SET status = 'Processing' WHERE status = 'New';\n-- Log the update\nINSERT INTO audit_log (description) VALUES ('Processed new orders.');\n-- Finalize processed orders\nUPDATE orders SET status = 'Finalized' WHERE status = 'Processing';\nEND;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE FullOrderManagement AS BEGIN SET NOCOUNT ON; UPDATE orders SET status = ? WHERE status = ?; INSERT INTO audit_log (description) VALUES (?); UPDATE orders SET status = ? WHERE status = ?; END;",
"statement_metadata": {
"size": 223,
"tables": ["orders", "audit_log"],
"commands": ["CREATE", "ALTER", "BEGIN", "UPDATE", "INSERT"],
"comments": ["-- Comprehensive procedure to manage order lifecycle", "-- It checks, processes, and logs orders.", "-- Check for new orders", "-- Log the update", "-- Finalize processed orders"],
"procedures": ["FullOrderManagement"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE ValidateOrderDetails AS\nBEGIN\n/*\n Procedure Name: ValidateOrderDetails\n Purpose: To validate the details of orders before processing.\n Detailed Description:\n This procedure runs through each order in the 'orders' table\n and checks if all required details are present.\n It updates the 'order_status' table with 'Valid' or 'Invalid'.\n It's a critical part of the order processing pipeline to ensure data integrity.\n*/\n-- Validation logic\nUPDATE orders SET status = CASE WHEN customer_id IS NOT NULL AND total_amount IS NOT NULL THEN 'Valid' ELSE 'Invalid' END;\nEND;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE ValidateOrderDetails AS BEGIN UPDATE orders SET status = CASE WHEN customer_id IS NOT NULL AND total_amount IS NOT NULL THEN ? ELSE ? END; END;",
"statement_metadata": {
"size": 466,
"tables": ["orders"],
"commands": ["CREATE", "ALTER", "BEGIN", "UPDATE"],
"comments": ["/*\n Procedure Name: ValidateOrderDetails\n Purpose: To validate the details of orders before processing.\n Detailed Description:\n This procedure runs through each order in the 'orders' table\n and checks if all required details are present.\n It updates the 'order_status' table with 'Valid' or 'Invalid'.\n It's a critical part of the order processing pipeline to ensure data integrity.\n*/", "-- Validation logic"],
"procedures": ["ValidateOrderDetails"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE CheckOrderStatus @orderId INT AS\nBEGIN\n-- Checks the status of an order and logs if it's delayed.\n-- This is part of our order monitoring system.\nSET NOCOUNT ON;\nDECLARE @status NVARCHAR(50);\nSELECT @status = status FROM orders WHERE id = @orderId;\nIF @status = 'Delayed'\nBEGIN\n INSERT INTO audit_log (description) VALUES ('Order ' + CAST(@orderId AS NVARCHAR(10)) + ' is delayed.');\nEND\nEND;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE CheckOrderStatus @orderId INT AS BEGIN SET NOCOUNT ON; DECLARE @status NVARCHAR(?); SELECT @status = status FROM orders WHERE id = @orderId; IF @status = ? BEGIN INSERT INTO audit_log (description) VALUES (? + CAST(@orderId AS NVARCHAR(?)) + ?); END END;",
"statement_metadata": {
"size": 164,
"tables": ["orders", "audit_log"],
"commands": ["CREATE", "ALTER", "BEGIN", "SELECT", "INSERT"],
"comments": ["-- Checks the status of an order and logs if it's delayed.", "-- This is part of our order monitoring system."],
"procedures": ["CheckOrderStatus"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE ArchiveOldOrders AS BEGIN SET NOCOUNT ON; DECLARE @orderId INT; DECLARE orderCursor CURSOR FOR SELECT id FROM orders WHERE order_date < GETDATE() - 365; OPEN orderCursor; FETCH NEXT FROM orderCursor INTO @orderId; WHILE @@FETCH_STATUS = 0 BEGIN INSERT INTO orders_archive (id, status) SELECT id, status FROM orders WHERE id = @orderId; FETCH NEXT FROM orderCursor INTO @orderId; END; CLOSE orderCursor; DEALLOCATE orderCursor; END;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE ArchiveOldOrders AS BEGIN SET NOCOUNT ON; DECLARE @orderId INT; DECLARE orderCursor CURSOR FOR SELECT id FROM orders WHERE order_date < GETDATE() - ?; OPEN orderCursor; FETCH NEXT FROM orderCursor INTO @orderId; WHILE @@FETCH_STATUS = ? BEGIN INSERT INTO orders_archive (id, status) SELECT id, status FROM orders WHERE id = @orderId; FETCH NEXT FROM orderCursor INTO @orderId; END; CLOSE orderCursor; DEALLOCATE orderCursor; END;",
"statement_metadata": {
"size": 75,
"tables": ["orders", "orderCursor", "orders_archive"],
"commands": ["CREATE", "ALTER", "BEGIN", "SELECT", "INSERT"],
"comments": [],
"procedures": ["ArchiveOldOrders"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE AuditOrderProcessing AS\nBEGIN\n/*\n Procedure: AuditOrderProcessing\n Author: Jane Doe\n Created: 2023-04-15\n Description: This procedure is designed to audit order processing steps.\n It checks each step of the order processing workflow and logs it into the audit_log table.\n Modifications:\n - 2023-04-20: Added additional logging for failed orders.\n - 2023-05-01: Updated logic to include new order status.\n*/\nSET NOCOUNT ON;\n-- Insert audit records\nINSERT INTO audit_log (description) SELECT 'Order processed: ' + CAST(id AS NVARCHAR(10)) FROM orders WHERE status = 'Processed';\nEND;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE AuditOrderProcessing AS BEGIN SET NOCOUNT ON; INSERT INTO audit_log (description) SELECT ? + CAST(id AS NVARCHAR(?)) FROM orders WHERE status = ?; END;",
"statement_metadata": {
"size": 478,
"tables": ["audit_log", "orders"],
"commands": ["CREATE", "ALTER", "BEGIN", "INSERT", "SELECT"],
"comments": ["/*\n Procedure: AuditOrderProcessing\n Author: Jane Doe\n Created: 2023-04-15\n Description: This procedure is designed to audit order processing steps.\n It checks each step of the order processing workflow and logs it into the audit_log table.\n Modifications:\n - 2023-04-20: Added additional logging for failed orders.\n - 2023-05-01: Updated logic to include new order status.\n*/", "-- Insert audit records"],
"procedures": ["AuditOrderProcessing"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE UpdateOrderStatus @orderId INT, @newStatus NVARCHAR(50) AS BEGIN SET NOCOUNT ON; BEGIN TRY BEGIN TRANSACTION; DECLARE @sql NVARCHAR(MAX) = N'UPDATE orders SET status = ''' + @newStatus + ''' WHERE id = ' + CAST(@orderId AS NVARCHAR(10)) + ';'; EXEC sp_executesql @sql; COMMIT TRANSACTION; END TRY BEGIN CATCH ROLLBACK TRANSACTION; THROW; END CATCH; END;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE UpdateOrderStatus @orderId INT, @newStatus NVARCHAR(?) AS BEGIN SET NOCOUNT ON; BEGIN TRY BEGIN TRANSACTION; DECLARE @sql NVARCHAR(MAX) = N ? ? + @newStatus + ? ? + CAST(@orderId AS NVARCHAR(?)) + ?; EXEC sp_executesql @sql; COMMIT TRANSACTION; END TRY BEGIN CATCH ROLLBACK TRANSACTION; THROW; END CATCH; END;",
"statement_metadata": {
"size": 43,
"tables": [],
"commands": ["CREATE", "ALTER", "BEGIN", "EXEC", "COMMIT"],
"comments": [],
"procedures": ["UpdateOrderStatus"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE DynamicCustomerQuery @query NVARCHAR(MAX) AS\nBEGIN\n-- Executes a dynamic SQL query based on the input.\n-- Used for flexible customer data retrieval.\nSET NOCOUNT ON;\nEXEC sp_executesql @query;\nEND;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE DynamicCustomerQuery @query NVARCHAR(MAX) AS BEGIN SET NOCOUNT ON; EXEC sp_executesql @query; END;",
"statement_metadata": {
"size": 136,
"tables": [],
"commands": ["CREATE", "ALTER", "BEGIN", "EXEC"],
"comments": ["-- Executes a dynamic SQL query based on the input.", "-- Used for flexible customer data retrieval."],
"procedures": ["DynamicCustomerQuery"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE FullOrderProcessing AS BEGIN SET NOCOUNT ON; EXEC ProcessOrders; EXEC UpdateOrderStatus 1, 'Dispatched'; END;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE FullOrderProcessing AS BEGIN SET NOCOUNT ON; EXEC ProcessOrders; EXEC UpdateOrderStatus ?, ?; END;",
"statement_metadata": {
"size": 39,
"tables": [],
"commands": ["CREATE", "ALTER", "BEGIN", "EXEC"],
"comments": [],
"procedures": ["FullOrderProcessing"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE ProcessOrders AS BEGIN SET NOCOUNT ON; BEGIN TRANSACTION; CREATE TABLE #TempOrders (id INT, status NVARCHAR(50)); INSERT INTO #TempOrders (id, status) SELECT id, status FROM orders WHERE status = 'Pending'; UPDATE orders SET status = 'Processing' WHERE status = 'Pending'; COMMIT TRANSACTION; SELECT * FROM #TempOrders; DROP TABLE #TempOrders; END;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE ProcessOrders AS BEGIN SET NOCOUNT ON; BEGIN TRANSACTION; CREATE TABLE #TempOrders (id INT, status NVARCHAR(?)); INSERT INTO #TempOrders (id, status) SELECT id, status FROM orders WHERE status = ?; UPDATE orders SET status = ? WHERE status = ?; COMMIT TRANSACTION; SELECT * FROM #TempOrders; DROP TABLE #TempOrders; END;",
"statement_metadata": {
"size": 74,
"tables": ["#TempOrders", "orders"],
"commands": ["CREATE", "ALTER", "BEGIN", "INSERT", "SELECT", "UPDATE", "COMMIT", "DROP"],
"comments": [],
"procedures": ["ProcessOrders"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE ProcessPayment @orderId INT, @amount MONEY AS\nBEGIN\n-- This procedure processes payments for orders.\n-- It includes error handling using TRY-CATCH.\nSET NOCOUNT ON;\nBEGIN TRY\n -- Attempt to process the payment\n UPDATE orders SET payment_received = 1, payment_amount = @amount WHERE id = @orderId;\nEND TRY\nBEGIN CATCH\n -- Handle the error\n INSERT INTO error_log (error_message) VALUES (ERROR_MESSAGE());\nEND CATCH\nEND;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE ProcessPayment @orderId INT, @amount MONEY AS BEGIN SET NOCOUNT ON; BEGIN TRY UPDATE orders SET payment_received = ?, payment_amount = @amount WHERE id = @orderId; END TRY BEGIN CATCH INSERT INTO error_log (error_message) VALUES (ERROR_MESSAGE()); END CATCH END;",
"statement_metadata": {
"size": 203,
"tables": ["orders", "error_log"],
"commands": ["CREATE", "ALTER", "BEGIN", "UPDATE", "INSERT"],
"comments": ["-- This procedure processes payments for orders.", "-- It includes error handling using TRY-CATCH.", "-- Attempt to process the payment", "-- Handle the error"],
"procedures": ["ProcessPayment"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,27 @@
{
"input": "CREATE OR ALTER PROCEDURE AuditOrderProcessing AS\nBEGIN\n/*\n Procedure: AuditOrderProcessing\n Author: Jane Doe\n Created: 2023-04-15\n Description: This procedure is designed to audit order processing steps.\n It checks each step of the order processing workflow and logs it into the audit_log table.\n Modifications:\n - 2023-04-20: Added additional logging for failed orders.\n - 2023-05-01: Updated logic to include new order status.\n*/\nSET NOCOUNT ON;\n-- Insert audit records\nINSERT INTO audit_log (description) SELECT 'Order processed: ' + CAST(id AS NVARCHAR(10)) FROM orders WHERE status = 'Processed';\nEND;",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE AuditOrderProcessing AS BEGIN SET NOCOUNT ON; INSERT INTO audit_log (description) SELECT ? + CAST(id AS NVARCHAR(?)) FROM orders WHERE status = ?; END;",
"statement_metadata": {
"size": 478,
"tables": ["audit_log", "orders"],
"commands": ["CREATE", "ALTER", "BEGIN", "INSERT", "SELECT"],
"comments": ["/*\n Procedure: AuditOrderProcessing\n Author: Jane Doe\n Created: 2023-04-15\n Description: This procedure is designed to audit order processing steps.\n It checks each step of the order processing workflow and logs it into the audit_log table.\n Modifications:\n - 2023-04-20: Added additional logging for failed orders.\n - 2023-05-01: Updated logic to include new order status.\n*/", "-- Insert audit records"],
"procedures": ["AuditOrderProcessing"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,26 @@
{
"input": "CREATE OR ALTER PROCEDURE UpdateCustomerStatus @customerId INT, @newStatus NVARCHAR(50) AS\nBEGIN\n-- This procedure updates the status of a customer.\n-- It takes the customer ID and the new status as parameters.\nSET NOCOUNT ON;\nUPDATE customers SET status = @newStatus WHERE id = @customerId;\nEND;\nEXEC UpdateCustomerStatus 123, 'Active';",
"outputs": [
{
"expected": "CREATE OR ALTER PROCEDURE UpdateCustomerStatus @customerId INT, @newStatus NVARCHAR(?) AS BEGIN SET NOCOUNT ON; UPDATE customers SET status = @newStatus WHERE id = @customerId; END; EXEC UpdateCustomerStatus ?, ?;",
"statement_metadata": {
"size": 167,
"tables": ["customers"],
"commands": ["CREATE", "ALTER", "BEGIN", "UPDATE", "EXEC"],
"comments": ["-- This procedure updates the status of a customer.", "-- It takes the customer ID and the new status as parameters."],
"procedures": ["UpdateCustomerStatus"]
},
"obfuscator_config": {
"replace_digits": true
},
"normalizer_config": {
"collect_tables": true,
"collect_commands": true,
"collect_comments": true,
"collect_procedure": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT id, name, email FROM customers WHERE active = 1;",
"outputs": [
{
"expected": "SELECT id, name, email FROM customers WHERE active = ?",
"statement_metadata": {
"size": 15,
"tables": ["customers"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "CREATE TABLE orders_compressed WITH (DATA_COMPRESSION = PAGE) AS SELECT * FROM orders;",
"outputs": [
{
"expected": "CREATE TABLE orders_compressed WITH ( DATA_COMPRESSION = PAGE ) AS SELECT * FROM orders",
"statement_metadata": {
"size": 35,
"tables": ["orders_compressed", "orders"],
"commands": ["CREATE", "SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "CREATE TABLE DocumentStore AS FileTable;",
"outputs": [
{
"expected": "CREATE TABLE DocumentStore",
"statement_metadata": {
"size": 19,
"tables": ["DocumentStore"],
"commands": ["CREATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT * FROM (SELECT customer_id, product_id, amount FROM order_details) AS SourceTable PIVOT (SUM(amount) FOR product_id IN ([1], [2], [3])) AS PivotTable;",
"outputs": [
{
"expected": "SELECT * FROM ( SELECT customer_id, product_id, amount FROM order_details ) PIVOT ( SUM ( amount ) FOR product_id IN ( ? ) )",
"statement_metadata": {
"size": 19,
"tables": ["order_details"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT id, name, CHOOSE(department_id, 'Sales', 'Engineering', 'HR') AS DepartmentName FROM employees;",
"outputs": [
{
"expected": "SELECT id, name, CHOOSE ( department_id, ?, ?, ? ) FROM employees",
"statement_metadata": {
"size": 15,
"tables": ["employees"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT name, FORMAT(joining_date, 'dd-MM-yyyy') AS FormattedJoiningDate FROM employees;",
"outputs": [
{
"expected": "SELECT name, FORMAT ( joining_date, ? ) FROM employees",
"statement_metadata": {
"size": 15,
"tables": ["employees"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT c.name, o.order_date FROM customers c FULL OUTER JOIN orders o ON c.id = o.customer_id WHERE c.region = 'West' OR o.amount > 500;",
"outputs": [
{
"expected": "SELECT c.name, o.order_date FROM customers c FULL OUTER JOIN orders o ON c.id = o.customer_id WHERE c.region = ? OR o.amount > ?",
"statement_metadata": {
"size": 25,
"tables": ["customers", "orders"],
"commands": ["SELECT", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "INSERT INTO employees (name, department) VALUES ('John Doe', 'Sales'); SELECT @@IDENTITY AS LastInsertedIdentity;",
"outputs": [
{
"expected": "INSERT INTO employees ( name, department ) VALUES ( ? ); SELECT @@IDENTITY",
"statement_metadata": {
"size": 21,
"tables": ["employees"],
"commands": ["INSERT", "SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT name, IIF(gender = 'M', 'Male', 'Female') AS GenderDescription FROM employees;",
"outputs": [
{
"expected": "SELECT name, IIF ( gender = ?, ?, ? ) FROM employees",
"statement_metadata": {
"size": 15,
"tables": ["employees"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT c.name, SUM(o.amount) AS total_sales FROM customers c INNER JOIN orders o ON c.id = o.customer_id GROUP BY c.name;",
"outputs": [
{
"expected": "SELECT c.name, SUM ( o.amount ) FROM customers c INNER JOIN orders o ON c.id = o.customer_id GROUP BY c.name",
"statement_metadata": {
"size": 25,
"tables": ["customers", "orders"],
"commands": ["SELECT", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT SYSTEM_USER AS CurrentSystemUser, USER_NAME() AS CurrentDatabaseUser, NEWID() AS UniqueIdentifier;",
"outputs": [
{
"expected": "SELECT SYSTEM_USER, USER_NAME ( ), NEWID ( )",
"statement_metadata": {
"size": 6,
"tables": [],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT * FROM (SELECT customer_id, product_id, amount FROM orders) AS SourceTable PIVOT (SUM(amount) FOR product_id IN ([1], [2], [3])) AS PivotTable;",
"outputs": [
{
"expected": "SELECT * FROM ( SELECT customer_id, product_id, amount FROM orders ) PIVOT ( SUM ( amount ) FOR product_id IN ( ? ) )",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT id, TRY_CONVERT(float, total_amount) AS TotalFloat FROM orders WHERE TRY_CONVERT(float, total_amount) IS NOT NULL;",
"outputs": [
{
"expected": "SELECT id, TRY_CONVERT ( float, total_amount ) FROM orders WHERE TRY_CONVERT ( float, total_amount ) IS NOT ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,24 @@
{
"input": "WITH RankedOrders AS (SELECT o.id, o.customer_id, RANK() OVER (PARTITION BY o.customer_id ORDER BY o.amount DESC) AS rnk FROM orders o) SELECT id FROM RankedOrders WHERE rnk = 1;",
"outputs": [
{
"expected": "WITH RankedOrders AS ( SELECT o.id, o.customer_id, RANK ( ) OVER ( PARTITION BY o.customer_id ORDER BY o.amount DESC ) FROM orders o ) SELECT id FROM RankedOrders WHERE rnk = ?",
"statement_metadata": {
"size": 24,
"tables": ["orders", "RankedOrders"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
},
{
"expected": "WITH RankedOrders AS (SELECT o.id, o.customer_id, RANK() OVER (PARTITION BY o.customer_id ORDER BY o.amount DESC) AS rnk FROM orders o) SELECT id FROM RankedOrders WHERE rnk = ?;",
"normalizer_config": {
"keep_sql_alias": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT id, name FROM customers ORDER BY name OFFSET 10 ROWS FETCH NEXT 5 ROWS ONLY;",
"outputs": [
{
"expected": "SELECT id, name FROM customers ORDER BY name OFFSET ? ROWS FETCH NEXT ? ROWS ONLY",
"statement_metadata": {
"size": 15,
"tables": ["customers"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT STRING_AGG(name, ', ') AS names FROM customers WHERE region = 'East';",
"outputs": [
{
"expected": "SELECT STRING_AGG ( name, ? ) FROM customers WHERE region = ?",
"statement_metadata": {
"size": 15,
"tables": ["customers"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT * FROM customers TABLESAMPLE (10 PERCENT);",
"outputs": [
{
"expected": "SELECT * FROM customers TABLESAMPLE ( ? PERCENT )",
"statement_metadata": {
"size": 15,
"tables": ["customers"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,24 @@
{
"input": "SELECT id, amount, ROW_NUMBER() OVER (ORDER BY amount DESC) AS rownum FROM orders;",
"outputs": [
{
"expected": "SELECT id, amount, ROW_NUMBER ( ) OVER ( ORDER BY amount DESC ) AS rownum FROM orders",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
},
{
"expected": "SELECT id, amount, ROW_NUMBER() OVER (ORDER BY amount DESC) AS rownum FROM orders;",
"normalizer_config": {
"keep_sql_alias": true,
"keep_trailing_semicolon": true,
"remove_space_between_parentheses": true
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "CREATE MESSAGE TYPE RequestMessage VALIDATION = WELL_FORMED_XML; CREATE CONTRACT RequestContract (RequestMessage SENT BY INITIATOR);",
"outputs": [
{
"expected": "CREATE MESSAGE TYPE RequestMessage VALIDATION = WELL_FORMED_XML; CREATE CONTRACT RequestContract ( RequestMessage SENT BY INITIATOR )",
"statement_metadata": {
"size": 6,
"tables": [],
"commands": ["CREATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT geography::Point(latitude, longitude, 4326).ToString() FROM locations;",
"outputs": [
{
"expected": "SELECT geography :: Point ( latitude, longitude, ? ) . ToString ( ) FROM locations",
"statement_metadata": {
"size": 15,
"tables": ["locations"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT xmlData.value('(/Customer/Name)[1]', 'nvarchar(100)') AS CustomerName FROM customerRecords;",
"outputs": [
{
"expected": "SELECT xmlData.value ( ? ) FROM customerRecords",
"statement_metadata": {
"size": 21,
"tables": ["customerRecords"],
"commands": ["SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET status = CASE WHEN amount >= 1000 THEN 'High Value' ELSE 'Regular' END;",
"outputs": [
{
"expected": "UPDATE orders SET status = CASE WHEN amount >= ? THEN ? ELSE ? END",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET status = 'Processed' WHERE order_date < GETDATE() - 30;",
"outputs": [
{
"expected": "UPDATE orders SET status = ? WHERE order_date < GETDATE ( ) - ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET status = 'Review Needed' WHERE customer_id IN (SELECT id FROM customers WHERE last_order_date < GETDATE() - 365) AND status = 'Pending';",
"outputs": [
{
"expected": "UPDATE orders SET status = ? WHERE customer_id IN ( SELECT id FROM customers WHERE last_order_date < GETDATE ( ) - ? ) AND status = ?",
"statement_metadata": {
"size": 27,
"tables": ["orders", "customers"],
"commands": ["UPDATE", "SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE o SET o.status = 'Completed' FROM orders o WHERE o.order_date > '2023-01-01' AND o.amount > 500;",
"outputs": [
{
"expected": "UPDATE o SET o.status = ? FROM orders o WHERE o.order_date > ? AND o.amount > ?",
"statement_metadata": {
"size": 13,
"tables": ["o", "orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE TOP (5) o SET o.status = 'Pending Review' FROM orders o INNER JOIN customers c ON o.customer_id = c.id WHERE c.region = 'North';",
"outputs": [
{
"expected": "UPDATE TOP ( ? ) o SET o.status = ? FROM orders o INNER JOIN customers c ON o.customer_id = c.id WHERE c.region = ?",
"statement_metadata": {
"size": 25,
"tables": ["orders", "customers"],
"commands": ["UPDATE", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders WITH (ROWLOCK) SET status = 'Processing' WHERE status = 'Pending';",
"outputs": [
{
"expected": "UPDATE orders WITH ( ROWLOCK ) SET status = ? WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE [orders] SET [status] = 'Confirmed' WHERE [order_date] >= '2023-01-01';",
"outputs": [
{
"expected": "UPDATE orders SET status = ? WHERE order_date >= ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE TOP (10) orders SET status = 'Reviewed' WHERE status = 'Pending';",
"outputs": [
{
"expected": "UPDATE TOP ( ? ) orders SET status = ? WHERE status = ?",
"statement_metadata": {
"size": 6,
"tables": [],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DECLARE @maxDate DATETIME; SET @maxDate = (SELECT MAX(order_date) FROM orders); UPDATE orders SET status = 'Old Order' WHERE order_date < @maxDate;",
"outputs": [
{
"expected": "DECLARE @maxDate DATETIME; SET @maxDate = ( SELECT MAX ( order_date ) FROM orders ); UPDATE orders SET status = ? WHERE order_date < @maxDate",
"statement_metadata": {
"size": 18,
"tables": ["orders"],
"commands": ["SELECT", "UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET is_priority = CASE WHEN total_amount > 1000 THEN 1 ELSE 0 END WHERE order_date > '2023-01-01';",
"outputs": [
{
"expected": "UPDATE orders SET is_priority = CASE WHEN total_amount > ? THEN ? ELSE ? END WHERE order_date > ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET status = CASE WHEN amount > 1000 THEN 'High Value' ELSE 'Standard' END WHERE order_date >= '2023-01-01';",
"outputs": [
{
"expected": "UPDATE orders SET status = CASE WHEN amount > ? THEN ? ELSE ? END WHERE order_date >= ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "WITH UpdatedOrders AS (SELECT id FROM orders WHERE order_date < GETDATE() - 30) UPDATE o SET o.status = 'Archived' FROM orders o JOIN UpdatedOrders uo ON o.id = uo.id;",
"outputs": [
{
"expected": "WITH UpdatedOrders AS ( SELECT id FROM orders WHERE order_date < GETDATE ( ) - ? ) UPDATE o SET o.status = ? FROM orders o JOIN UpdatedOrders uo ON o.id = uo.id",
"statement_metadata": {
"size": 36,
"tables": ["orders", "o", "UpdatedOrders"],
"commands": ["SELECT", "UPDATE", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET due_date = DATEADD(day, 10, order_date) WHERE status = 'Pending';",
"outputs": [
{
"expected": "UPDATE orders SET due_date = DATEADD ( day, ?, order_date ) WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE o SET o.status = 'Dispatched' FROM orders o INNER JOIN customers c ON o.customer_id = c.id WHERE c.region = 'West' AND o.status = 'Processed';",
"outputs": [
{
"expected": "UPDATE o SET o.status = ? FROM orders o INNER JOIN customers c ON o.customer_id = c.id WHERE c.region = ? AND o.status = ?",
"statement_metadata": {
"size": 26,
"tables": ["o", "orders", "customers"],
"commands": ["UPDATE", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DECLARE @status NVARCHAR(50); SET @status = 'Delayed'; UPDATE orders SET status = @status WHERE order_date < GETDATE() - 60;",
"outputs": [
{
"expected": "DECLARE @status NVARCHAR ( ? ); SET @status = ?; UPDATE orders SET status = @status WHERE order_date < GETDATE ( ) - ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET delivery_date = NULLIF(order_date, due_date) WHERE status = 'Cancelled';",
"outputs": [
{
"expected": "UPDATE orders SET delivery_date = NULLIF ( order_date, due_date ) WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET total_amount = quantity * unit_price WHERE status = 'Pending';",
"outputs": [
{
"expected": "UPDATE orders SET total_amount = quantity * unit_price WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET status = 'Cancelled' OUTPUT deleted.id, deleted.status WHERE status = 'Pending' AND order_date < GETDATE() - 90;",
"outputs": [
{
"expected": "UPDATE orders SET status = ? OUTPUT deleted.id, deleted.status WHERE status = ? AND order_date < GETDATE ( ) - ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET notes = CONCAT(notes, ' Updated on ', CONVERT(VARCHAR, GETDATE(), 101)) WHERE status = 'Shipped';",
"outputs": [
{
"expected": "UPDATE orders SET notes = CONCAT ( notes, ?, CONVERT ( VARCHAR, GETDATE ( ), ? ) ) WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["UPDATE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "UPDATE orders SET status = 'High Priority' WHERE id IN (SELECT order_id FROM order_details WHERE quantity > 10);",
"outputs": [
{
"expected": "UPDATE orders SET status = ? WHERE id IN ( SELECT order_id FROM order_details WHERE quantity > ? )",
"statement_metadata": {
"size": 31,
"tables": ["orders", "order_details"],
"commands": ["UPDATE", "SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT a.*, b.totalAmount, CASE WHEN c.id IS NOT NULL THEN d.description ELSE 'N/A' END AS description\n-- Joining table a with b to get total amounts. If c.id is not null, get description from d\nFROM (SELECT id, name, status, customer_id\n FROM orders\n WHERE order_date > DATE_ADD(CURDATE(), INTERVAL -6 MONTH)\n AND status IN ('Pending', 'Completed')\n AND customer_id IN (SELECT customer_id FROM customers WHERE region IN ('East', 'West') AND last_order_date > DATE_ADD(CURDATE(), INTERVAL -1 YEAR))\n ORDER BY name DESC) a\nINNER JOIN (SELECT order_id, SUM(amount) AS totalAmount FROM order_details GROUP BY order_id) b ON a.id = b.order_id\nLEFT JOIN audit_log c ON a.id = c.order_id\nLEFT JOIN (SELECT DISTINCT status, description FROM status_descriptions) d ON a.status = d.status\nWHERE a.name LIKE '%test%'\n-- Filtering on name containing 'test'\nAND (b.totalAmount > 1000 OR b.totalAmount IS NULL)\nORDER BY a.order_date DESC, a.name;",
"outputs": [
{
"expected": "SELECT a. *, b.totalAmount, CASE WHEN c.id IS NOT ? THEN d.description ELSE ? END FROM ( SELECT id, name, status, customer_id FROM orders WHERE order_date > DATE_ADD ( CURDATE ( ), INTERVAL ? MONTH ) AND status IN ( ? ) AND customer_id IN ( SELECT customer_id FROM customers WHERE region IN ( ? ) AND last_order_date > DATE_ADD ( CURDATE ( ), INTERVAL ? YEAR ) ) ORDER BY name DESC ) a INNER JOIN ( SELECT order_id, SUM ( amount ) FROM order_details GROUP BY order_id ) b ON a.id = b.order_id LEFT JOIN audit_log c ON a.id = c.order_id LEFT JOIN ( SELECT DISTINCT status, description FROM status_descriptions ) d ON a.status = d.status WHERE a.name LIKE ? AND ( b.totalAmount > ? OR b.totalAmount IS ? ) ORDER BY a.order_date DESC, a.name",
"statement_metadata": {
"size": 195,
"tables": ["orders", "customers", "order_details", "audit_log", "status_descriptions"],
"commands": ["SELECT", "JOIN"],
"comments": ["-- Joining table a with b to get total amounts. If c.id is not null, get description from d", "-- Filtering on name containing 'test'"],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "SELECT a.id, a.name, IFNULL(b.totalAmount, 0) AS totalAmount, c.comment, d.productCount, e.latestOrderDate\n-- Extremely complex query combining multiple joins, subqueries, and inline views\nFROM (SELECT id, name FROM customers WHERE status = 'Active') a\nJOIN (SELECT customer_id, SUM(amount) AS totalAmount FROM orders GROUP BY customer_id) b ON a.id = b.customer_id\nLEFT JOIN (SELECT customer_id, comment FROM customer_feedback WHERE rating = 5 ORDER BY feedback_date DESC LIMIT 1) c ON a.id = c.customer_id\nLEFT JOIN (SELECT customer_id, COUNT(*) AS productCount FROM order_details GROUP BY customer_id) d ON a.id = d.customer_id\nLEFT JOIN (SELECT customer_id, MAX(order_date) AS latestOrderDate FROM orders WHERE status IN ('Completed', 'Shipped') GROUP BY customer_id) e ON a.id = e.customer_id\nWHERE a.name LIKE '%Corp%' AND (b.totalAmount > 1000 OR d.productCount > 5)\nORDER BY a.name, totalAmount DESC;",
"outputs": [
{
"expected": "SELECT a.id, a.name, IFNULL ( b.totalAmount, ? ), c.comment, d.productCount, e.latestOrderDate FROM ( SELECT id, name FROM customers WHERE status = ? ) a JOIN ( SELECT customer_id, SUM ( amount ) FROM orders GROUP BY customer_id ) b ON a.id = b.customer_id LEFT JOIN ( SELECT customer_id, comment FROM customer_feedback WHERE rating = ? ORDER BY feedback_date DESC LIMIT ? ) c ON a.id = c.customer_id LEFT JOIN ( SELECT customer_id, COUNT ( * ) FROM order_details GROUP BY customer_id ) d ON a.id = d.customer_id LEFT JOIN ( SELECT customer_id, MAX ( order_date ) FROM orders WHERE status IN ( ? ) GROUP BY customer_id ) e ON a.id = e.customer_id WHERE a.name LIKE ? AND ( b.totalAmount > ? OR d.productCount > ? ) ORDER BY a.name, totalAmount DESC",
"statement_metadata": {
"size": 136,
"tables": ["customers", "orders", "customer_feedback", "order_details"],
"commands": ["SELECT", "JOIN"],
"comments": ["-- Extremely complex query combining multiple joins, subqueries, and inline views"],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,15 @@
{
"input": "SELECT t1.id, t1.status, t3.totalAmount, t4.commentsCount, CASE WHEN t5.latestCommentDate IS NOT NULL THEN t5.latestCommentDate ELSE 'No Comments' END AS latestComment\n-- Complex query joining multiple subqueries and using conditional logic\nFROM (SELECT id, status FROM orders WHERE customer_id IN (SELECT id FROM customers WHERE region = 'North') AND order_date > (SELECT MAX(order_date) FROM orders WHERE status = 'Pending')) t1\nJOIN (SELECT order_id, SUM(amount) AS totalAmount FROM order_details WHERE product_id IN (SELECT id FROM products WHERE name LIKE '%Premium%') GROUP BY order_id) t3 ON t1.id = t3.order_id\nLEFT JOIN (SELECT order_id, COUNT(*) AS commentsCount FROM order_comments GROUP BY order_id) t4 ON t1.id = t4.order_id\nLEFT JOIN (SELECT order_id, MAX(comment_date) AS latestCommentDate FROM order_comments WHERE comment LIKE '%urgent%' GROUP BY order_id) t5 ON t1.id = t5.order_id\nWHERE t1.status NOT IN ('Cancelled', 'Returned') AND (t3.totalAmount > 500 OR t4.commentsCount > 10)\nORDER BY t1.id, latestComment DESC;",
"outputs": [
{
"expected": "SELECT t?.id, t?.status, t?.totalAmount, t?.commentsCount, CASE WHEN t?.latestCommentDate IS NOT ? THEN t?.latestCommentDate ELSE ? END FROM ( SELECT id, status FROM orders WHERE customer_id IN ( SELECT id FROM customers WHERE region = ? ) AND order_date > ( SELECT MAX ( order_date ) FROM orders WHERE status = ? ) ) t? JOIN ( SELECT order_id, SUM ( amount ) FROM order_details WHERE product_id IN ( SELECT id FROM products WHERE name LIKE ? ) GROUP BY order_id ) t? ON t?.id = t?.order_id LEFT JOIN ( SELECT order_id, COUNT ( * ) FROM order_comments GROUP BY order_id ) t? ON t?.id = t?.order_id LEFT JOIN ( SELECT order_id, MAX ( comment_date ) FROM order_comments WHERE comment LIKE ? GROUP BY order_id ) t? ON t?.id = t?.order_id WHERE t?.status NOT IN ( ? ) AND ( t?.totalAmount > ? OR t?.commentsCount > ? ) ORDER BY t?.id, latestComment DESC",
"statement_metadata": {
"size": 132,
"tables": ["orders", "customers", "order_details", "products", "order_comments"],
"commands": ["SELECT", "JOIN"],
"comments": ["-- Complex query joining multiple subqueries and using conditional logic"],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM orders WHERE status = 'Cancelled';",
"outputs": [
{
"expected": "DELETE FROM orders WHERE status = ?",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM customers WHERE region = 'North'; -- Assuming CASCADE DELETE is set up on the foreign key in the orders table",
"outputs": [
{
"expected": "DELETE FROM customers WHERE region = ?",
"statement_metadata": {
"size": 90,
"tables": ["customers"],
"commands": ["DELETE"],
"comments": ["-- Assuming CASCADE DELETE is set up on the foreign key in the orders table"],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM customers WHERE id = 1; -- Assumes a trigger exists for cascading delete to orders",
"outputs": [
{
"expected": "DELETE FROM customers WHERE id = ?",
"statement_metadata": {
"size": 73,
"tables": ["customers"],
"commands": ["DELETE"],
"comments": ["-- Assumes a trigger exists for cascading delete to orders"],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM orders WHERE status = IF(DAYOFWEEK(CURDATE()) = 1, 'Pending', 'Completed');",
"outputs": [
{
"expected": "DELETE FROM orders WHERE status = IF ( DAYOFWEEK ( CURDATE ( ) ) = ?, ?, ? )",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM orders WHERE id IN (SELECT order_id FROM order_details WHERE quantity = 0);",
"outputs": [
{
"expected": "DELETE FROM orders WHERE id IN ( SELECT order_id FROM order_details WHERE quantity = ? )",
"statement_metadata": {
"size": 31,
"tables": ["orders", "order_details"],
"commands": ["DELETE", "SELECT"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE FROM orders WHERE order_date < '2020-01-01'; OPTIMIZE TABLE orders;",
"outputs": [
{
"expected": "DELETE FROM orders WHERE order_date < ?; OPTIMIZE TABLE orders",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE o FROM orders o JOIN customers c ON o.customer_id = c.id WHERE o.status = 'Completed' AND c.region = 'South';",
"outputs": [
{
"expected": "DELETE o FROM orders o JOIN customers c ON o.customer_id = c.id WHERE o.status = ? AND c.region = ?",
"statement_metadata": {
"size": 25,
"tables": ["orders", "customers"],
"commands": ["DELETE", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "LOCK TABLES orders WRITE; DELETE FROM orders WHERE status = 'Failed'; UNLOCK TABLES;",
"outputs": [
{
"expected": "LOCK TABLES orders WRITE; DELETE FROM orders WHERE status = ?; UNLOCK TABLES",
"statement_metadata": {
"size": 12,
"tables": ["orders"],
"commands": ["DELETE"],
"comments": [],
"procedures": []
}
}
]
}

View File

@@ -0,0 +1,16 @@
{
"input": "DELETE orders, order_details FROM orders INNER JOIN order_details ON orders.id = order_details.order_id WHERE orders.status = 'Obsolete';",
"outputs": [
{
"expected": "DELETE orders, order_details FROM orders INNER JOIN order_details ON orders.id = order_details.order_id WHERE orders.status = ?",
"statement_metadata": {
"size": 29,
"tables": ["orders", "order_details"],
"commands": ["DELETE", "JOIN"],
"comments": [],
"procedures": []
}
}
]
}

Some files were not shown because too many files have changed in this diff Show More