fix: fix dns on linux (#336)

* fix: fix dns on linux

* feat: detect run in Github action or not to setup DNS
This commit is contained in:
naison
2024-10-09 19:17:50 +08:00
committed by GitHub
parent e2757d3916
commit d141ec869b
912 changed files with 144260 additions and 5039 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,105 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package jsontext implements syntactic processing of JSON
// as specified in RFC 4627, RFC 7159, RFC 7493, RFC 8259, and RFC 8785.
// JSON is a simple data interchange format that can represent
// primitive data types such as booleans, strings, and numbers,
// in addition to structured data types such as objects and arrays.
//
// The [Encoder] and [Decoder] types are used to encode or decode
// a stream of JSON tokens or values.
//
// # Tokens and Values
//
// A JSON token refers to the basic structural elements of JSON:
//
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - a start or end delimiter for a JSON object (i.e., '{' or '}')
// - a start or end delimiter for a JSON array (i.e., '[' or ']')
//
// A JSON token is represented by the [Token] type in Go. Technically,
// there are two additional structural characters (i.e., ':' and ','),
// but there is no [Token] representation for them since their presence
// can be inferred by the structure of the JSON grammar itself.
// For example, there must always be an implicit colon between
// the name and value of a JSON object member.
//
// A JSON value refers to a complete unit of JSON data:
//
// - a JSON literal, string, or number
// - a JSON object (e.g., `{"name":"value"}`)
// - a JSON array (e.g., `[1,2,3,]`)
//
// A JSON value is represented by the [Value] type in Go and is a []byte
// containing the raw textual representation of the value. There is some overlap
// between tokens and values as both contain literals, strings, and numbers.
// However, only a value can represent the entirety of a JSON object or array.
//
// The [Encoder] and [Decoder] types contain methods to read or write the next
// [Token] or [Value] in a sequence. They maintain a state machine to validate
// whether the sequence of JSON tokens and/or values produces a valid JSON.
// [Options] may be passed to the [NewEncoder] or [NewDecoder] constructors
// to configure the syntactic behavior of encoding and decoding.
//
// # Terminology
//
// The terms "encode" and "decode" are used for syntactic functionality
// that is concerned with processing JSON based on its grammar, and
// the terms "marshal" and "unmarshal" are used for semantic functionality
// that determines the meaning of JSON values as Go values and vice-versa.
// This package (i.e., [jsontext]) deals with JSON at a syntactic layer,
// while [encoding/json/v2] deals with JSON at a semantic layer.
// The goal is to provide a clear distinction between functionality that
// is purely concerned with encoding versus that of marshaling.
// For example, one can directly encode a stream of JSON tokens without
// needing to marshal a concrete Go value representing them.
// Similarly, one can decode a stream of JSON tokens without
// needing to unmarshal them into a concrete Go value.
//
// This package uses JSON terminology when discussing JSON, which may differ
// from related concepts in Go or elsewhere in computing literature.
//
// - a JSON "object" refers to an unordered collection of name/value members.
// - a JSON "array" refers to an ordered sequence of elements.
// - a JSON "value" refers to either a literal (i.e., null, false, or true),
// string, number, object, or array.
//
// See RFC 8259 for more information.
//
// # Specifications
//
// Relevant specifications include RFC 4627, RFC 7159, RFC 7493, RFC 8259,
// and RFC 8785. Each RFC is generally a stricter subset of another RFC.
// In increasing order of strictness:
//
// - RFC 4627 and RFC 7159 do not require (but recommend) the use of UTF-8
// and also do not require (but recommend) that object names be unique.
// - RFC 8259 requires the use of UTF-8,
// but does not require (but recommends) that object names be unique.
// - RFC 7493 requires the use of UTF-8
// and also requires that object names be unique.
// - RFC 8785 defines a canonical representation. It requires the use of UTF-8
// and also requires that object names be unique and in a specific ordering.
// It specifies exactly how strings and numbers must be formatted.
//
// The primary difference between RFC 4627 and RFC 7159 is that the former
// restricted top-level values to only JSON objects and arrays, while
// RFC 7159 and subsequent RFCs permit top-level values to additionally be
// JSON nulls, booleans, strings, or numbers.
//
// By default, this package operates on RFC 7493, but can be configured
// to operate according to the other RFC specifications.
// RFC 7493 is a stricter subset of RFC 8259 and fully compliant with it.
// In particular, it makes specific choices about behavior that RFC 8259
// leaves as undefined in order to ensure greater interoperability.
package jsontext
// requireKeyedLiterals can be embedded in a struct to require keyed literals.
type requireKeyedLiterals struct{}
// nonComparable can be embedded in a struct to prevent comparability.
type nonComparable [0]func()

View File

@@ -0,0 +1,900 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"bytes"
"io"
"math/bits"
"github.com/go-json-experiment/json/internal/jsonflags"
"github.com/go-json-experiment/json/internal/jsonopts"
"github.com/go-json-experiment/json/internal/jsonwire"
)
// Encoder is a streaming encoder from raw JSON tokens and values.
// It is used to write a stream of top-level JSON values,
// each terminated with a newline character.
//
// [Encoder.WriteToken] and [Encoder.WriteValue] calls may be interleaved.
// For example, the following JSON value:
//
// {"name":"value","array":[null,false,true,3.14159],"object":{"k":"v"}}
//
// can be composed with the following calls (ignoring errors for brevity):
//
// e.WriteToken(ObjectStart) // {
// e.WriteToken(String("name")) // "name"
// e.WriteToken(String("value")) // "value"
// e.WriteValue(Value(`"array"`)) // "array"
// e.WriteToken(ArrayStart) // [
// e.WriteToken(Null) // null
// e.WriteToken(False) // false
// e.WriteValue(Value("true")) // true
// e.WriteToken(Float(3.14159)) // 3.14159
// e.WriteToken(ArrayEnd) // ]
// e.WriteValue(Value(`"object"`)) // "object"
// e.WriteValue(Value(`{"k":"v"}`)) // {"k":"v"}
// e.WriteToken(ObjectEnd) // }
//
// The above is one of many possible sequence of calls and
// may not represent the most sensible method to call for any given token/value.
// For example, it is probably more common to call [Encoder.WriteToken] with a string
// for object names.
type Encoder struct {
s encoderState
}
// encoderState is the low-level state of Encoder.
// It has exported fields and method for use by the "json" package.
type encoderState struct {
state
encodeBuffer
jsonopts.Struct
SeenPointers map[any]struct{} // only used when marshaling; identical to json.seenPointers
}
// encodeBuffer is a buffer split into 2 segments:
//
// - buf[0:len(buf)] // written (but unflushed) portion of the buffer
// - buf[len(buf):cap(buf)] // unused portion of the buffer
type encodeBuffer struct {
Buf []byte // may alias wr if it is a bytes.Buffer
// baseOffset is added to len(buf) to obtain the absolute offset
// relative to the start of io.Writer stream.
baseOffset int64
wr io.Writer
// maxValue is the approximate maximum Value size passed to WriteValue.
maxValue int
// unusedCache is the buffer returned by the UnusedBuffer method.
unusedCache []byte
// bufStats is statistics about buffer utilization.
// It is only used with pooled encoders in pools.go.
bufStats bufferStatistics
}
// NewEncoder constructs a new streaming encoder writing to w
// configured with the provided options.
// It flushes the internal buffer when the buffer is sufficiently full or
// when a top-level value has been written.
//
// If w is a [bytes.Buffer], then the encoder appends directly into the buffer
// without copying the contents from an intermediate buffer.
func NewEncoder(w io.Writer, opts ...Options) *Encoder {
e := new(Encoder)
e.Reset(w, opts...)
return e
}
// Reset resets an encoder such that it is writing afresh to w and
// configured with the provided options. Reset must not be called on
// a Encoder passed to the [encoding/json/v2.MarshalerV2.MarshalJSONV2] method
// or the [encoding/json/v2.MarshalFuncV2] function.
func (e *Encoder) Reset(w io.Writer, opts ...Options) {
switch {
case e == nil:
panic("jsontext: invalid nil Encoder")
case w == nil:
panic("jsontext: invalid nil io.Writer")
case e.s.Flags.Get(jsonflags.WithinArshalCall):
panic("jsontext: cannot reset Encoder passed to json.MarshalerV2")
}
e.s.reset(nil, w, opts...)
}
func (e *encoderState) reset(b []byte, w io.Writer, opts ...Options) {
e.state.reset()
e.encodeBuffer = encodeBuffer{Buf: b, wr: w, bufStats: e.bufStats}
if bb, ok := w.(*bytes.Buffer); ok && bb != nil {
e.Buf = bb.Bytes()[bb.Len():] // alias the unused buffer of bb
}
e.Struct = jsonopts.Struct{}
e.Struct.Join(opts...)
if e.Flags.Get(jsonflags.Expand) && !e.Flags.Has(jsonflags.Indent) {
e.Indent = "\t"
}
}
// NeedFlush determines whether to flush at this point.
func (e *encoderState) NeedFlush() bool {
// NOTE: This function is carefully written to be inlinable.
// Avoid flushing if e.wr is nil since there is no underlying writer.
// Flush if less than 25% of the capacity remains.
// Flushing at some constant fraction ensures that the buffer stops growing
// so long as the largest Token or Value fits within that unused capacity.
return e.wr != nil && (e.Tokens.Depth() == 1 || len(e.Buf) > 3*cap(e.Buf)/4)
}
// Flush flushes the buffer to the underlying io.Writer.
// It may append a trailing newline after the top-level value.
func (e *encoderState) Flush() error {
if e.wr == nil || e.avoidFlush() {
return nil
}
// In streaming mode, always emit a newline after the top-level value.
if e.Tokens.Depth() == 1 && !e.Flags.Get(jsonflags.OmitTopLevelNewline) {
e.Buf = append(e.Buf, '\n')
}
// Inform objectNameStack that we are about to flush the buffer content.
e.Names.copyQuotedBuffer(e.Buf)
// Specialize bytes.Buffer for better performance.
if bb, ok := e.wr.(*bytes.Buffer); ok {
// If e.buf already aliases the internal buffer of bb,
// then the Write call simply increments the internal offset,
// otherwise Write operates as expected.
// See https://go.dev/issue/42986.
n, _ := bb.Write(e.Buf) // never fails unless bb is nil
e.baseOffset += int64(n)
// If the internal buffer of bytes.Buffer is too small,
// append operations elsewhere in the Encoder may grow the buffer.
// This would be semantically correct, but hurts performance.
// As such, ensure 25% of the current length is always available
// to reduce the probability that other appends must allocate.
if avail := bb.Available(); avail < bb.Len()/4 {
bb.Grow(avail + 1)
}
e.Buf = bb.AvailableBuffer()
return nil
}
// Flush the internal buffer to the underlying io.Writer.
n, err := e.wr.Write(e.Buf)
e.baseOffset += int64(n)
if err != nil {
// In the event of an error, preserve the unflushed portion.
// Thus, write errors aren't fatal so long as the io.Writer
// maintains consistent state after errors.
if n > 0 {
e.Buf = e.Buf[:copy(e.Buf, e.Buf[n:])]
}
return &ioError{action: "write", err: err}
}
e.Buf = e.Buf[:0]
// Check whether to grow the buffer.
// Note that cap(e.buf) may already exceed maxBufferSize since
// an append elsewhere already grew it to store a large token.
const maxBufferSize = 4 << 10
const growthSizeFactor = 2 // higher value is faster
const growthRateFactor = 2 // higher value is slower
// By default, grow if below the maximum buffer size.
grow := cap(e.Buf) <= maxBufferSize/growthSizeFactor
// Growing can be expensive, so only grow
// if a sufficient number of bytes have been processed.
grow = grow && int64(cap(e.Buf)) < e.previousOffsetEnd()/growthRateFactor
if grow {
e.Buf = make([]byte, 0, cap(e.Buf)*growthSizeFactor)
}
return nil
}
// injectSyntacticErrorWithPosition wraps a SyntacticError with the position,
// otherwise it returns the error as is.
// It takes a position relative to the start of the start of e.buf.
func (e *encodeBuffer) injectSyntacticErrorWithPosition(err error, pos int) error {
if serr, ok := err.(*SyntacticError); ok {
return serr.withOffset(e.baseOffset + int64(pos))
}
return err
}
func (e *encodeBuffer) previousOffsetEnd() int64 { return e.baseOffset + int64(len(e.Buf)) }
func (e *encodeBuffer) unflushedBuffer() []byte { return e.Buf }
// avoidFlush indicates whether to avoid flushing to ensure there is always
// enough in the buffer to unwrite the last object member if it were empty.
func (e *encoderState) avoidFlush() bool {
switch {
case e.Tokens.Last.Length() == 0:
// Never flush after ObjectStart or ArrayStart since we don't know yet
// if the object or array will end up being empty.
return true
case e.Tokens.Last.needObjectValue():
// Never flush before the object value since we don't know yet
// if the object value will end up being empty.
return true
case e.Tokens.Last.NeedObjectName() && len(e.Buf) >= 2:
// Never flush after the object value if it does turn out to be empty.
switch string(e.Buf[len(e.Buf)-2:]) {
case `ll`, `""`, `{}`, `[]`: // last two bytes of every empty value
return true
}
}
return false
}
// UnwriteEmptyObjectMember unwrites the last object member if it is empty
// and reports whether it performed an unwrite operation.
func (e *encoderState) UnwriteEmptyObjectMember(prevName *string) bool {
if last := e.Tokens.Last; !last.isObject() || !last.NeedObjectName() || last.Length() == 0 {
panic("BUG: must be called on an object after writing a value")
}
// The flushing logic is modified to never flush a trailing empty value.
// The encoder never writes trailing whitespace eagerly.
b := e.unflushedBuffer()
// Detect whether the last value was empty.
var n int
if len(b) >= 3 {
switch string(b[len(b)-2:]) {
case "ll": // last two bytes of `null`
n = len(`null`)
case `""`:
// It is possible for a non-empty string to have `""` as a suffix
// if the second to the last quote was escaped.
if b[len(b)-3] == '\\' {
return false // e.g., `"\""` is not empty
}
n = len(`""`)
case `{}`:
n = len(`{}`)
case `[]`:
n = len(`[]`)
}
}
if n == 0 {
return false
}
// Unwrite the value, whitespace, colon, name, whitespace, and comma.
b = b[:len(b)-n]
b = jsonwire.TrimSuffixWhitespace(b)
b = jsonwire.TrimSuffixByte(b, ':')
b = jsonwire.TrimSuffixString(b)
b = jsonwire.TrimSuffixWhitespace(b)
b = jsonwire.TrimSuffixByte(b, ',')
e.Buf = b // store back truncated unflushed buffer
// Undo state changes.
e.Tokens.Last.decrement() // for object member value
e.Tokens.Last.decrement() // for object member name
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if e.Tokens.Last.isActiveNamespace() {
e.Namespaces.Last().removeLast()
}
e.Names.clearLast()
if prevName != nil {
e.Names.copyQuotedBuffer(e.Buf) // required by objectNameStack.replaceLastUnquotedName
e.Names.replaceLastUnquotedName(*prevName)
}
}
return true
}
// UnwriteOnlyObjectMemberName unwrites the only object member name
// and returns the unquoted name.
func (e *encoderState) UnwriteOnlyObjectMemberName() string {
if last := e.Tokens.Last; !last.isObject() || last.Length() != 1 {
panic("BUG: must be called on an object after writing first name")
}
// Unwrite the name and whitespace.
b := jsonwire.TrimSuffixString(e.Buf)
isVerbatim := bytes.IndexByte(e.Buf[len(b):], '\\') < 0
name := string(jsonwire.UnquoteMayCopy(e.Buf[len(b):], isVerbatim))
e.Buf = jsonwire.TrimSuffixWhitespace(b)
// Undo state changes.
e.Tokens.Last.decrement()
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
if e.Tokens.Last.isActiveNamespace() {
e.Namespaces.Last().removeLast()
}
e.Names.clearLast()
}
return name
}
// WriteToken writes the next token and advances the internal write offset.
//
// The provided token kind must be consistent with the JSON grammar.
// For example, it is an error to provide a number when the encoder
// is expecting an object name (which is always a string), or
// to provide an end object delimiter when the encoder is finishing an array.
// If the provided token is invalid, then it reports a [SyntacticError] and
// the internal state remains unchanged. The offset reported
// in [SyntacticError] will be relative to the [Encoder.OutputOffset].
func (e *Encoder) WriteToken(t Token) error {
return e.s.WriteToken(t)
}
func (e *encoderState) WriteToken(t Token) error {
k := t.Kind()
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.Expand) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the token
// Append the token to the output and to the state machine.
var err error
switch k {
case 'n':
b = append(b, "null"...)
err = e.Tokens.appendLiteral()
case 'f':
b = append(b, "false"...)
err = e.Tokens.appendLiteral()
case 't':
b = append(b, "true"...)
err = e.Tokens.appendLiteral()
case '"':
if b, err = t.appendString(b, &e.Flags); err != nil {
break
}
if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() {
if !e.Tokens.Last.isValidNamespace() {
err = errInvalidNamespace
break
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
err = newDuplicateNameError(b[pos:])
break
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
err = e.Tokens.appendString()
case '0':
if b, err = t.appendNumber(b, e.Flags.Get(jsonflags.CanonicalizeNumbers)); err != nil {
break
}
err = e.Tokens.appendNumber()
case '{':
b = append(b, '{')
if err = e.Tokens.pushObject(); err != nil {
break
}
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Names.push()
e.Namespaces.push()
}
case '}':
b = append(b, '}')
if err = e.Tokens.popObject(); err != nil {
break
}
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Names.pop()
e.Namespaces.pop()
}
case '[':
b = append(b, '[')
err = e.Tokens.pushArray()
case ']':
b = append(b, ']')
err = e.Tokens.popArray()
default:
err = &SyntacticError{str: "invalid json.Token"}
}
if err != nil {
return e.injectSyntacticErrorWithPosition(err, pos)
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// AppendRaw appends either a raw string (without double quotes) or number.
// Specify safeASCII if the string output is guaranteed to be ASCII
// without any characters (including '<', '>', and '&') that need escaping,
// otherwise this will validate whether the string needs escaping.
// The appended bytes for a JSON number must be valid.
//
// This is a specialized implementation of Encoder.WriteValue
// that allows appending directly into the buffer.
// It is only called from marshal logic in the "json" package.
func (e *encoderState) AppendRaw(k Kind, safeASCII bool, appendFn func([]byte) ([]byte, error)) error {
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.Expand) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the token
var err error
switch k {
case '"':
// Append directly into the encoder buffer by assuming that
// most of the time none of the characters need escaping.
b = append(b, '"')
if b, err = appendFn(b); err != nil {
return err
}
b = append(b, '"')
// Check whether we need to escape the string and if necessary
// copy it to a scratch buffer and then escape it back.
isVerbatim := safeASCII || !jsonwire.NeedEscape(b[pos+len(`"`):len(b)-len(`"`)])
if !isVerbatim {
var err error
b2 := append(e.unusedCache, b[pos+len(`"`):len(b)-len(`"`)]...)
b, err = jsonwire.AppendQuote(b[:pos], string(b2), &e.Flags)
e.unusedCache = b2[:0]
if err != nil {
return e.injectSyntacticErrorWithPosition(err, pos)
}
}
// Update the state machine.
if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() {
if !e.Tokens.Last.isValidNamespace() {
return errInvalidNamespace
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], isVerbatim) {
err := newDuplicateNameError(b[pos:])
return e.injectSyntacticErrorWithPosition(err, pos)
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
if err := e.Tokens.appendString(); err != nil {
return e.injectSyntacticErrorWithPosition(err, pos)
}
case '0':
if b, err = appendFn(b); err != nil {
return err
}
if err := e.Tokens.appendNumber(); err != nil {
return e.injectSyntacticErrorWithPosition(err, pos)
}
default:
panic("BUG: invalid kind")
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// WriteValue writes the next raw value and advances the internal write offset.
// The Encoder does not simply copy the provided value verbatim, but
// parses it to ensure that it is syntactically valid and reformats it
// according to how the Encoder is configured to format whitespace and strings.
// If [AllowInvalidUTF8] is specified, then any invalid UTF-8 is mangled
// as the Unicode replacement character, U+FFFD.
//
// The provided value kind must be consistent with the JSON grammar
// (see examples on [Encoder.WriteToken]). If the provided value is invalid,
// then it reports a [SyntacticError] and the internal state remains unchanged.
// The offset reported in [SyntacticError] will be relative to the
// [Encoder.OutputOffset] plus the offset into v of any encountered syntax error.
func (e *Encoder) WriteValue(v Value) error {
return e.s.WriteValue(v)
}
func (e *encoderState) WriteValue(v Value) error {
e.maxValue |= len(v) // bitwise OR is a fast approximation of max
k := v.Kind()
b := e.Buf // use local variable to avoid mutating e in case of error
// Append any delimiters or optional whitespace.
b = e.Tokens.MayAppendDelim(b, k)
if e.Flags.Get(jsonflags.Expand) {
b = e.appendWhitespace(b, k)
}
pos := len(b) // offset before the value
// Append the value the output.
var n int
n += jsonwire.ConsumeWhitespace(v[n:])
b, m, err := e.reformatValue(b, v[n:], e.Tokens.Depth())
if err != nil {
return e.injectSyntacticErrorWithPosition(err, pos+n+m)
}
n += m
n += jsonwire.ConsumeWhitespace(v[n:])
if len(v) > n {
err = newInvalidCharacterError(v[n:], "after top-level value")
return e.injectSyntacticErrorWithPosition(err, pos+n)
}
// Append the kind to the state machine.
switch k {
case 'n', 'f', 't':
err = e.Tokens.appendLiteral()
case '"':
if !e.Flags.Get(jsonflags.AllowDuplicateNames) && e.Tokens.Last.NeedObjectName() {
if !e.Tokens.Last.isValidNamespace() {
err = errInvalidNamespace
break
}
if e.Tokens.Last.isActiveNamespace() && !e.Namespaces.Last().insertQuoted(b[pos:], false) {
err = newDuplicateNameError(b[pos:])
break
}
e.Names.ReplaceLastQuotedOffset(pos) // only replace if insertQuoted succeeds
}
err = e.Tokens.appendString()
case '0':
err = e.Tokens.appendNumber()
case '{':
if err = e.Tokens.pushObject(); err != nil {
break
}
if err = e.Tokens.popObject(); err != nil {
panic("BUG: popObject should never fail immediately after pushObject: " + err.Error())
}
case '[':
if err = e.Tokens.pushArray(); err != nil {
break
}
if err = e.Tokens.popArray(); err != nil {
panic("BUG: popArray should never fail immediately after pushArray: " + err.Error())
}
}
if err != nil {
return e.injectSyntacticErrorWithPosition(err, pos)
}
// Finish off the buffer and store it back into e.
e.Buf = b
if e.NeedFlush() {
return e.Flush()
}
return nil
}
// appendWhitespace appends whitespace that immediately precedes the next token.
func (e *encoderState) appendWhitespace(b []byte, next Kind) []byte {
if e.Tokens.needDelim(next) == ':' {
return append(b, ' ')
} else {
return e.AppendIndent(b, e.Tokens.NeedIndent(next))
}
}
// AppendIndent appends the appropriate number of indentation characters
// for the current nested level, n.
func (e *encoderState) AppendIndent(b []byte, n int) []byte {
if n == 0 {
return b
}
b = append(b, '\n')
b = append(b, e.IndentPrefix...)
for ; n > 1; n-- {
b = append(b, e.Indent...)
}
return b
}
// reformatValue parses a JSON value from the start of src and
// appends it to the end of dst, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatValue(dst []byte, src Value, depth int) ([]byte, int, error) {
// TODO: Should this update ValueFlags as input?
if len(src) == 0 {
return dst, 0, io.ErrUnexpectedEOF
}
switch k := Kind(src[0]).normalize(); k {
case 'n':
if jsonwire.ConsumeNull(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "null")
return dst, n, err
}
return append(dst, "null"...), len("null"), nil
case 'f':
if jsonwire.ConsumeFalse(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "false")
return dst, n, err
}
return append(dst, "false"...), len("false"), nil
case 't':
if jsonwire.ConsumeTrue(src) == 0 {
n, err := jsonwire.ConsumeLiteral(src, "true")
return dst, n, err
}
return append(dst, "true"...), len("true"), nil
case '"':
if n := jsonwire.ConsumeSimpleString(src); n > 0 {
dst, src = append(dst, src[:n]...), src[n:] // copy simple strings verbatim
return dst, n, nil
}
return jsonwire.ReformatString(dst, src, &e.Flags)
case '0':
if n := jsonwire.ConsumeSimpleNumber(src); n > 0 && !e.Flags.Get(jsonflags.CanonicalizeNumbers) {
dst, src = append(dst, src[:n]...), src[n:] // copy simple numbers verbatim
return dst, n, nil
}
return jsonwire.ReformatNumber(dst, src, e.Flags.Get(jsonflags.CanonicalizeNumbers))
case '{':
return e.reformatObject(dst, src, depth)
case '[':
return e.reformatArray(dst, src, depth)
default:
return dst, 0, newInvalidCharacterError(src, "at start of value")
}
}
// reformatObject parses a JSON object from the start of src and
// appends it to the end of src, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatObject(dst []byte, src Value, depth int) ([]byte, int, error) {
// Append object start.
if len(src) == 0 || src[0] != '{' {
panic("BUG: reformatObject must be called with a buffer that starts with '{'")
} else if depth == maxNestingDepth+1 {
return dst, 0, errMaxDepth
}
dst = append(dst, '{')
n := len("{")
// Append (possible) object end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
if src[n] == '}' {
dst = append(dst, '}')
n += len("}")
return dst, n, nil
}
var err error
var names *objectNamespace
if !e.Flags.Get(jsonflags.AllowDuplicateNames) {
e.Namespaces.push()
defer e.Namespaces.pop()
names = e.Namespaces.Last()
}
depth++
for {
// Append optional newline and indentation.
if e.Flags.Get(jsonflags.Expand) {
dst = e.AppendIndent(dst, depth)
}
// Append object name.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
m := jsonwire.ConsumeSimpleString(src[n:])
if m > 0 {
dst = append(dst, src[n:n+m]...)
} else {
dst, m, err = jsonwire.ReformatString(dst, src[n:], &e.Flags)
if err != nil {
return dst, n + m, err
}
}
// TODO: Specify whether the name is verbatim or not.
if !e.Flags.Get(jsonflags.AllowDuplicateNames) && !names.insertQuoted(src[n:n+m], false) {
return dst, n, newDuplicateNameError(src[n : n+m])
}
n += m
// Append colon.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
if src[n] != ':' {
return dst, n, newInvalidCharacterError(src[n:], "after object name (expecting ':')")
}
dst = append(dst, ':')
n += len(":")
if e.Flags.Get(jsonflags.Expand) {
dst = append(dst, ' ')
}
// Append object value.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
dst, m, err = e.reformatValue(dst, src[n:], depth)
if err != nil {
return dst, n + m, err
}
n += m
// Append comma or object end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
switch src[n] {
case ',':
dst = append(dst, ',')
n += len(",")
continue
case '}':
if e.Flags.Get(jsonflags.Expand) {
dst = e.AppendIndent(dst, depth-1)
}
dst = append(dst, '}')
n += len("}")
return dst, n, nil
default:
return dst, n, newInvalidCharacterError(src[n:], "after object value (expecting ',' or '}')")
}
}
}
// reformatArray parses a JSON array from the start of src and
// appends it to the end of dst, reformatting whitespace and strings as needed.
// It returns the extended dst buffer and the number of consumed input bytes.
func (e *encoderState) reformatArray(dst []byte, src Value, depth int) ([]byte, int, error) {
// Append array start.
if len(src) == 0 || src[0] != '[' {
panic("BUG: reformatArray must be called with a buffer that starts with '['")
} else if depth == maxNestingDepth+1 {
return dst, 0, errMaxDepth
}
dst = append(dst, '[')
n := len("[")
// Append (possible) array end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
if src[n] == ']' {
dst = append(dst, ']')
n += len("]")
return dst, n, nil
}
var err error
depth++
for {
// Append optional newline and indentation.
if e.Flags.Get(jsonflags.Expand) {
dst = e.AppendIndent(dst, depth)
}
// Append array value.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
var m int
dst, m, err = e.reformatValue(dst, src[n:], depth)
if err != nil {
return dst, n + m, err
}
n += m
// Append comma or array end.
n += jsonwire.ConsumeWhitespace(src[n:])
if uint(len(src)) <= uint(n) {
return dst, n, io.ErrUnexpectedEOF
}
switch src[n] {
case ',':
dst = append(dst, ',')
n += len(",")
continue
case ']':
if e.Flags.Get(jsonflags.Expand) {
dst = e.AppendIndent(dst, depth-1)
}
dst = append(dst, ']')
n += len("]")
return dst, n, nil
default:
return dst, n, newInvalidCharacterError(src[n:], "after array value (expecting ',' or ']')")
}
}
}
// OutputOffset returns the current output byte offset. It gives the location
// of the next byte immediately after the most recently written token or value.
// The number of bytes actually written to the underlying [io.Writer] may be less
// than this offset due to internal buffering effects.
func (e *Encoder) OutputOffset() int64 {
return e.s.previousOffsetEnd()
}
// UnusedBuffer returns a zero-length buffer with a possible non-zero capacity.
// This buffer is intended to be used to populate a [Value]
// being passed to an immediately succeeding [Encoder.WriteValue] call.
//
// Example usage:
//
// b := d.UnusedBuffer()
// b = append(b, '"')
// b = appendString(b, v) // append the string formatting of v
// b = append(b, '"')
// ... := d.WriteValue(b)
//
// It is the user's responsibility to ensure that the value is valid JSON.
func (e *Encoder) UnusedBuffer() []byte {
// NOTE: We don't return e.buf[len(e.buf):cap(e.buf)] since WriteValue would
// need to take special care to avoid mangling the data while reformatting.
// WriteValue can't easily identify whether the input Value aliases e.buf
// without using unsafe.Pointer. Thus, we just return a different buffer.
// Should this ever alias e.buf, we need to consider how it operates with
// the specialized performance optimization for bytes.Buffer.
n := 1 << bits.Len(uint(e.s.maxValue|63)) // fast approximation for max length
if cap(e.s.unusedCache) < n {
e.s.unusedCache = make([]byte, 0, n)
}
return e.s.unusedCache
}
// StackDepth returns the depth of the state machine for written JSON data.
// Each level on the stack represents a nested JSON object or array.
// It is incremented whenever an [ObjectStart] or [ArrayStart] token is encountered
// and decremented whenever an [ObjectEnd] or [ArrayEnd] token is encountered.
// The depth is zero-indexed, where zero represents the top-level JSON value.
func (e *Encoder) StackDepth() int {
// NOTE: Keep in sync with Decoder.StackDepth.
return e.s.Tokens.Depth() - 1
}
// StackIndex returns information about the specified stack level.
// It must be a number between 0 and [Encoder.StackDepth], inclusive.
// For each level, it reports the kind:
//
// - 0 for a level of zero,
// - '{' for a level representing a JSON object, and
// - '[' for a level representing a JSON array.
//
// It also reports the length of that JSON object or array.
// Each name and value in a JSON object is counted separately,
// so the effective number of members would be half the length.
// A complete JSON object must have an even length.
func (e *Encoder) StackIndex(i int) (Kind, int) {
// NOTE: Keep in sync with Decoder.StackIndex.
switch s := e.s.Tokens.index(i); {
case i > 0 && s.isObject():
return '{', s.Length()
case i > 0 && s.isArray():
return '[', s.Length()
default:
return 0, s.Length()
}
}
// StackPointer returns a JSON Pointer (RFC 6901) to the most recently written value.
// Object names are only present if [AllowDuplicateNames] is false, otherwise
// object members are represented using their index within the object.
func (e *Encoder) StackPointer() string {
e.s.Names.copyQuotedBuffer(e.s.Buf)
return string(e.s.appendStackPointer(nil))
}

View File

@@ -0,0 +1,60 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"github.com/go-json-experiment/json/internal/jsonwire"
)
const errorPrefix = "jsontext: "
type ioError struct {
action string // either "read" or "write"
err error
}
func (e *ioError) Error() string {
return errorPrefix + e.action + " error: " + e.err.Error()
}
func (e *ioError) Unwrap() error {
return e.err
}
// SyntacticError is a description of a syntactic error that occurred when
// encoding or decoding JSON according to the grammar.
//
// The contents of this error as produced by this package may change over time.
type SyntacticError struct {
requireKeyedLiterals
nonComparable
// ByteOffset indicates that an error occurred after this byte offset.
ByteOffset int64
str string
}
func (e *SyntacticError) Error() string {
return errorPrefix + e.str
}
func (e *SyntacticError) withOffset(pos int64) error {
return &SyntacticError{ByteOffset: pos, str: e.str}
}
func newDuplicateNameError[Bytes ~[]byte | ~string](quoted Bytes) *SyntacticError {
return &SyntacticError{str: "duplicate name " + string(quoted) + " in object"}
}
func newInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) *SyntacticError {
what := jsonwire.QuoteRune(prefix)
return &SyntacticError{str: "invalid character " + what + " " + where}
}
// TODO: Error types between "json", "jsontext", and "jsonwire" is a mess.
// Clean this up.
func init() {
// Inject behavior in "jsonwire" so that it can produce SyntacticError types.
jsonwire.NewError = func(s string) error { return &SyntacticError{str: s} }
jsonwire.ErrInvalidUTF8 = &SyntacticError{str: jsonwire.ErrInvalidUTF8.Error()}
}

View File

@@ -0,0 +1,83 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"io"
"github.com/go-json-experiment/json/internal"
)
// Internal is for internal use only.
// This is exempt from the Go compatibility agreement.
var Internal exporter
type exporter struct{}
// Export exposes internal functionality from "jsontext" to "json".
// This cannot be dynamically called by other packages since
// they cannot obtain a reference to the internal.AllowInternalUse value.
func (exporter) Export(p *internal.NotForPublicUse) export {
if p != &internal.AllowInternalUse {
panic("unauthorized call to Export")
}
return export{}
}
// The export type exposes functionality to packages with visibility to
// the internal.AllowInternalUse variable. The "json" package uses this
// to modify low-level state in the Encoder and Decoder types.
// It mutates the state directly instead of calling ReadToken or WriteToken
// since this is more performant. The public APIs need to track state to ensure
// that users are constructing a valid JSON value, but the "json" implementation
// guarantees that it emits valid JSON by the structure of the code itself.
type export struct{}
// Encoder returns a pointer to the underlying encoderState.
func (export) Encoder(e *Encoder) *encoderState { return &e.s }
// Decoder returns a pointer to the underlying decoderState.
func (export) Decoder(d *Decoder) *decoderState { return &d.s }
func (export) GetBufferedEncoder(o ...Options) *Encoder {
return getBufferedEncoder(o...)
}
func (export) PutBufferedEncoder(e *Encoder) {
putBufferedEncoder(e)
}
func (export) GetStreamingEncoder(w io.Writer, o ...Options) *Encoder {
return getStreamingEncoder(w, o...)
}
func (export) PutStreamingEncoder(e *Encoder) {
putStreamingEncoder(e)
}
func (export) GetBufferedDecoder(b []byte, o ...Options) *Decoder {
return getBufferedDecoder(b, o...)
}
func (export) PutBufferedDecoder(d *Decoder) {
putBufferedDecoder(d)
}
func (export) GetStreamingDecoder(r io.Reader, o ...Options) *Decoder {
return getStreamingDecoder(r, o...)
}
func (export) PutStreamingDecoder(d *Decoder) {
putStreamingDecoder(d)
}
func (export) NewDuplicateNameError(quoted []byte, pos int64) error {
return newDuplicateNameError(quoted).withOffset(pos)
}
func (export) NewInvalidCharacterError(prefix, where string, pos int64) error {
return newInvalidCharacterError(prefix, where).withOffset(pos)
}
func (export) NewMissingNameError(pos int64) error {
return errMissingName.withOffset(pos)
}
func (export) NewInvalidUTF8Error(pos int64) error {
return errInvalidUTF8.withOffset(pos)
}

View File

@@ -0,0 +1,178 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"strings"
"github.com/go-json-experiment/json/internal/jsonflags"
"github.com/go-json-experiment/json/internal/jsonopts"
"github.com/go-json-experiment/json/internal/jsonwire"
)
// Options configures [NewEncoder], [Encoder.Reset], [NewDecoder],
// and [Decoder.Reset] with specific features.
// Each function takes in a variadic list of options, where properties
// set in latter options override the value of previously set properties.
//
// The Options type is identical to [encoding/json.Options] and
// [encoding/json/v2.Options]. Options from the other packages may
// be passed to functionality in this package, but are ignored.
// Options from this package may be used with the other packages.
type Options = jsonopts.Options
// AllowDuplicateNames specifies that JSON objects may contain
// duplicate member names. Disabling the duplicate name check may provide
// performance benefits, but breaks compliance with RFC 7493, section 2.3.
// The input or output will still be compliant with RFC 8259,
// which leaves the handling of duplicate names as unspecified behavior.
//
// This affects either encoding or decoding.
func AllowDuplicateNames(v bool) Options {
if v {
return jsonflags.AllowDuplicateNames | 1
} else {
return jsonflags.AllowDuplicateNames | 0
}
}
// AllowInvalidUTF8 specifies that JSON strings may contain invalid UTF-8,
// which will be mangled as the Unicode replacement character, U+FFFD.
// This causes the encoder or decoder to break compliance with
// RFC 7493, section 2.1, and RFC 8259, section 8.1.
//
// This affects either encoding or decoding.
func AllowInvalidUTF8(v bool) Options {
if v {
return jsonflags.AllowInvalidUTF8 | 1
} else {
return jsonflags.AllowInvalidUTF8 | 0
}
}
// EscapeForHTML specifies that '<', '>', and '&' characters within JSON strings
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u003c) so that
// the output is safe to embed within HTML.
//
// This only affects encoding and is ignored when decoding.
func EscapeForHTML(v bool) Options {
if v {
return jsonflags.EscapeForHTML | 1
} else {
return jsonflags.EscapeForHTML | 0
}
}
// EscapeForJS specifies that U+2028 and U+2029 characters within JSON strings
// should be escaped as a hexadecimal Unicode codepoint (e.g., \u2028) so that
// the output is valid to embed within JavaScript. See RFC 8259, section 12.
//
// This only affects encoding and is ignored when decoding.
func EscapeForJS(v bool) Options {
if v {
return jsonflags.EscapeForJS | 1
} else {
return jsonflags.EscapeForJS | 0
}
}
// Expand specifies that the JSON output should be expanded,
// where every JSON object member or JSON array element
// appears on a new, indented line according to the nesting depth.
// If an indent is not already specified, then it defaults to using "\t".
//
// If set to false, then the output is compact,
// where no whitespace is emitted between JSON values.
//
// This only affects encoding and is ignored when decoding.
func Expand(v bool) Options {
if v {
return jsonflags.Expand | 1
} else {
return jsonflags.Expand | 0
}
}
// WithIndent specifies that the encoder should emit multiline output
// where each element in a JSON object or array begins on a new, indented line
// beginning with the indent prefix (see [WithIndentPrefix])
// followed by one or more copies of indent according to the nesting depth.
// The indent must only be composed of space or tab characters.
//
// If the intent to emit indented output without a preference for
// the particular indent string, then use [Expand] instead.
//
// This only affects encoding and is ignored when decoding.
// Use of this option implies [Expand] being set to true.
func WithIndent(indent string) Options {
// Fast-path: Return a constant for common indents, which avoids allocating.
// These are derived from analyzing the Go module proxy on 2023-07-01.
switch indent {
case "\t":
return jsonopts.Indent("\t") // ~14k usages
case " ":
return jsonopts.Indent(" ") // ~18k usages
case " ":
return jsonopts.Indent(" ") // ~1.7k usages
case " ":
return jsonopts.Indent(" ") // ~52k usages
case " ":
return jsonopts.Indent(" ") // ~12k usages
case "":
return jsonopts.Indent("") // ~1.5k usages
}
// Otherwise, allocate for this unique value.
if s := strings.Trim(indent, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent")
}
return jsonopts.Indent(indent)
}
// WithIndentPrefix specifies that the encoder should emit multiline output
// where each element in a JSON object or array begins on a new, indented line
// beginning with the indent prefix followed by one or more copies of indent
// (see [WithIndent]) according to the nesting depth.
// The prefix must only be composed of space or tab characters.
//
// This only affects encoding and is ignored when decoding.
// Use of this option implies [Expand] being set to true.
func WithIndentPrefix(prefix string) Options {
if s := strings.Trim(prefix, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix")
}
return jsonopts.IndentPrefix(prefix)
}
/*
// TODO(https://go.dev/issue/56733): Implement WithByteLimit and WithDepthLimit.
// WithByteLimit sets a limit on the number of bytes of input or output bytes
// that may be consumed or produced for each top-level JSON value.
// If a [Decoder] or [Encoder] method call would need to consume/produce
// more than a total of n bytes to make progress on the top-level JSON value,
// then the call will report an error.
// Whitespace before and within the top-level value are counted against the limit.
// Whitespace after a top-level value are counted against the limit
// for the next top-level value.
//
// A non-positive limit is equivalent to no limit at all.
// If unspecified, the default limit is no limit at all.
func WithByteLimit(n int64) Options {
return jsonopts.ByteLimit(max(n, 0))
}
// WithDepthLimit sets a limit on the maximum depth of JSON nesting
// that may be consumed or produced for each top-level JSON value.
// If a [Decoder] or [Encoder] method call would need to consume or produce
// a depth greater than n to make progress on the top-level JSON value,
// then the call will report an error.
//
// A non-positive limit is equivalent to no limit at all.
// If unspecified, the default limit is 10000.
func WithDepthLimit(n int) Options {
return jsonopts.DepthLimit(max(n, 0))
}
*/

View File

@@ -0,0 +1,150 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"bytes"
"io"
"math/bits"
"sync"
)
// TODO(https://go.dev/issue/47657): Use sync.PoolOf.
var (
// This owns the internal buffer since there is no io.Writer to output to.
// Since the buffer can get arbitrarily large in normal usage,
// there is statistical tracking logic to determine whether to recycle
// the internal buffer or not based on a history of utilization.
bufferedEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
// This owns the internal buffer, but it is only used to temporarily store
// buffered JSON before flushing it to the underlying io.Writer.
// In a sufficiently efficient streaming mode, we do not expect the buffer
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
streamingEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
// This does not own the internal buffer since
// it is taken directly from the provided bytes.Buffer.
bytesBufferEncoderPool = &sync.Pool{New: func() any { return new(Encoder) }}
)
// bufferStatistics is statistics to track buffer utilization.
// It is used to determine whether to recycle a buffer or not
// to avoid https://go.dev/issue/23199.
type bufferStatistics struct {
strikes int // number of times the buffer was under-utilized
prevLen int // length of previous buffer
}
func getBufferedEncoder(opts ...Options) *Encoder {
e := bufferedEncoderPool.Get().(*Encoder)
if e.s.Buf == nil {
// Round up to nearest 2ⁿ to make best use of malloc size classes.
// See runtime/sizeclasses.go on Go1.15.
// Logical OR with 63 to ensure 64 as the minimum buffer size.
n := 1 << bits.Len(uint(e.s.bufStats.prevLen|63))
e.s.Buf = make([]byte, 0, n)
}
e.s.reset(e.s.Buf[:0], nil, opts...)
return e
}
func putBufferedEncoder(e *Encoder) {
// Recycle large buffers only if sufficiently utilized.
// If a buffer is under-utilized enough times sequentially,
// then it is discarded, ensuring that a single large buffer
// won't be kept alive by a continuous stream of small usages.
//
// The worst case utilization is computed as:
// MIN_UTILIZATION_THRESHOLD / (1 + MAX_NUM_STRIKES)
//
// For the constants chosen below, this is (25%)/(1+4) ⇒ 5%.
// This may seem low, but it ensures a lower bound on
// the absolute worst-case utilization. Without this check,
// this would be theoretically 0%, which is infinitely worse.
//
// See https://go.dev/issue/27735.
switch {
case cap(e.s.Buf) <= 4<<10: // always recycle buffers smaller than 4KiB
e.s.bufStats.strikes = 0
case cap(e.s.Buf)/4 <= len(e.s.Buf): // at least 25% utilization
e.s.bufStats.strikes = 0
case e.s.bufStats.strikes < 4: // at most 4 strikes
e.s.bufStats.strikes++
default: // discard the buffer; too large and too often under-utilized
e.s.bufStats.strikes = 0
e.s.bufStats.prevLen = len(e.s.Buf) // heuristic for size to allocate next time
e.s.Buf = nil
}
bufferedEncoderPool.Put(e)
}
func getStreamingEncoder(w io.Writer, opts ...Options) *Encoder {
if _, ok := w.(*bytes.Buffer); ok {
e := bytesBufferEncoderPool.Get().(*Encoder)
e.s.reset(nil, w, opts...) // buffer taken from bytes.Buffer
return e
} else {
e := streamingEncoderPool.Get().(*Encoder)
e.s.reset(e.s.Buf[:0], w, opts...) // preserve existing buffer
return e
}
}
func putStreamingEncoder(e *Encoder) {
if _, ok := e.s.wr.(*bytes.Buffer); ok {
bytesBufferEncoderPool.Put(e)
} else {
if cap(e.s.Buf) > 64<<10 {
e.s.Buf = nil // avoid pinning arbitrarily large amounts of memory
}
streamingEncoderPool.Put(e)
}
}
var (
// This does not own the internal buffer since it is externally provided.
bufferedDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
// This owns the internal buffer, but it is only used to temporarily store
// buffered JSON fetched from the underlying io.Reader.
// In a sufficiently efficient streaming mode, we do not expect the buffer
// to grow arbitrarily large. Thus, we avoid recycling large buffers.
streamingDecoderPool = &sync.Pool{New: func() any { return new(Decoder) }}
// This does not own the internal buffer since
// it is taken directly from the provided bytes.Buffer.
bytesBufferDecoderPool = bufferedDecoderPool
)
func getBufferedDecoder(b []byte, opts ...Options) *Decoder {
d := bufferedDecoderPool.Get().(*Decoder)
d.s.reset(b, nil, opts...)
return d
}
func putBufferedDecoder(d *Decoder) {
bufferedDecoderPool.Put(d)
}
func getStreamingDecoder(r io.Reader, opts ...Options) *Decoder {
if _, ok := r.(*bytes.Buffer); ok {
d := bytesBufferDecoderPool.Get().(*Decoder)
d.s.reset(nil, r, opts...) // buffer taken from bytes.Buffer
return d
} else {
d := streamingDecoderPool.Get().(*Decoder)
d.s.reset(d.s.buf[:0], r, opts...) // preserve existing buffer
return d
}
}
func putStreamingDecoder(d *Decoder) {
if _, ok := d.s.rd.(*bytes.Buffer); ok {
bytesBufferDecoderPool.Put(d)
} else {
if cap(d.s.buf) > 64<<10 {
d.s.buf = nil // avoid pinning arbitrarily large amounts of memory
}
streamingDecoderPool.Put(d)
}
}

View File

@@ -0,0 +1,31 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"github.com/go-json-experiment/json/internal/jsonflags"
"github.com/go-json-experiment/json/internal/jsonwire"
)
var errInvalidUTF8 = &SyntacticError{str: "invalid UTF-8 within string"}
// AppendQuote appends a double-quoted JSON string literal representing src
// to dst and returns the extended buffer.
// It uses the minimal string representation per RFC 8785, section 3.2.2.2.
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
// and an error is returned at the end indicating the presence of invalid UTF-8.
func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
return jsonwire.AppendQuote(dst, src, &jsonflags.Flags{})
}
// AppendUnquote appends the decoded interpretation of src as a
// double-quoted JSON string literal to dst and returns the extended buffer.
// The input src must be a JSON string without any surrounding whitespace.
// Invalid UTF-8 bytes are replaced with the Unicode replacement character
// and an error is returned at the end indicating the presence of invalid UTF-8.
// Any trailing bytes after the JSON string literal results in an error.
func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) ([]byte, error) {
return jsonwire.AppendUnquote(dst, src)
}

View File

@@ -0,0 +1,715 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"math"
"strconv"
"github.com/go-json-experiment/json/internal/jsonwire"
)
var (
errMissingName = &SyntacticError{str: "missing string for object name"}
errMissingColon = &SyntacticError{str: "missing character ':' after object name"}
errMissingValue = &SyntacticError{str: "missing value after object name"}
errMissingComma = &SyntacticError{str: "missing character ',' after object or array value"}
errMismatchDelim = &SyntacticError{str: "mismatching structural token for object or array"}
errMaxDepth = &SyntacticError{str: "exceeded max depth"}
errInvalidNamespace = &SyntacticError{str: "object namespace is in an invalid state"}
)
// Per RFC 8259, section 9, implementations may enforce a maximum depth.
// Such a limit is necessary to prevent stack overflows.
const maxNestingDepth = 10000
type state struct {
// Tokens validates whether the next token kind is valid.
Tokens stateMachine
// Names is a stack of object names.
// Not used if AllowDuplicateNames is true.
Names objectNameStack
// Namespaces is a stack of object namespaces.
// For performance reasons, Encoder or Decoder may not update this
// if Marshal or Unmarshal is able to track names in a more efficient way.
// See makeMapArshaler and makeStructArshaler.
// Not used if AllowDuplicateNames is true.
Namespaces objectNamespaceStack
}
func (s *state) reset() {
s.Tokens.reset()
s.Names.reset()
s.Namespaces.reset()
}
// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.
// The returned pointer is only accurate if s.names is populated,
// otherwise it uses the numeric index as the object member name.
//
// Invariant: Must call s.names.copyQuotedBuffer beforehand.
func (s state) appendStackPointer(b []byte) []byte {
var objectDepth int
for i := 1; i < s.Tokens.Depth(); i++ {
e := s.Tokens.index(i)
if e.Length() == 0 {
break // empty object or array
}
b = append(b, '/')
switch {
case e.isObject():
if objectDepth < s.Names.length() {
for _, c := range s.Names.getUnquoted(objectDepth) {
// Per RFC 6901, section 3, escape '~' and '/' characters.
switch c {
case '~':
b = append(b, "~0"...)
case '/':
b = append(b, "~1"...)
default:
b = append(b, c)
}
}
} else {
// Since the names stack is unpopulated, the name is unknown.
// As a best-effort replacement, use the numeric member index.
// While inaccurate, it produces a syntactically valid pointer.
b = strconv.AppendUint(b, uint64((e.Length()-1)/2), 10)
}
objectDepth++
case e.isArray():
b = strconv.AppendUint(b, uint64(e.Length()-1), 10)
}
}
return b
}
// stateMachine is a push-down automaton that validates whether
// a sequence of tokens is valid or not according to the JSON grammar.
// It is useful for both encoding and decoding.
//
// It is a stack where each entry represents a nested JSON object or array.
// The stack has a minimum depth of 1 where the first level is a
// virtual JSON array to handle a stream of top-level JSON values.
// The top-level virtual JSON array is special in that it doesn't require commas
// between each JSON value.
//
// For performance, most methods are carefully written to be inlinable.
// The zero value is a valid state machine ready for use.
type stateMachine struct {
Stack []stateEntry
Last stateEntry
}
// reset resets the state machine.
// The machine always starts with a minimum depth of 1.
func (m *stateMachine) reset() {
m.Stack = m.Stack[:0]
if cap(m.Stack) > 1<<10 {
m.Stack = nil
}
m.Last = stateTypeArray
}
// Depth is the current nested depth of JSON objects and arrays.
// It is one-indexed (i.e., top-level values have a depth of 1).
func (m stateMachine) Depth() int {
return len(m.Stack) + 1
}
// index returns a reference to the ith entry.
// It is only valid until the next push method call.
func (m *stateMachine) index(i int) *stateEntry {
if i == len(m.Stack) {
return &m.Last
}
return &m.Stack[i]
}
// DepthLength reports the current nested depth and
// the length of the last JSON object or array.
func (m stateMachine) DepthLength() (int, int) {
return m.Depth(), m.Last.Length()
}
// appendLiteral appends a JSON literal as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendLiteral() error {
switch {
case m.Last.NeedObjectName():
return errMissingName
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last.Increment()
return nil
}
}
// appendString appends a JSON string as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendString() error {
switch {
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last.Increment()
return nil
}
}
// appendNumber appends a JSON number as the next token in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) appendNumber() error {
return m.appendLiteral()
}
// pushObject appends a JSON start object token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) pushObject() error {
switch {
case m.Last.NeedObjectName():
return errMissingName
case !m.Last.isValidNamespace():
return errInvalidNamespace
case len(m.Stack) == maxNestingDepth:
return errMaxDepth
default:
m.Last.Increment()
m.Stack = append(m.Stack, m.Last)
m.Last = stateTypeObject
return nil
}
}
// popObject appends a JSON end object token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) popObject() error {
switch {
case !m.Last.isObject():
return errMismatchDelim
case m.Last.needObjectValue():
return errMissingValue
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last = m.Stack[len(m.Stack)-1]
m.Stack = m.Stack[:len(m.Stack)-1]
return nil
}
}
// pushArray appends a JSON start array token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) pushArray() error {
switch {
case m.Last.NeedObjectName():
return errMissingName
case !m.Last.isValidNamespace():
return errInvalidNamespace
case len(m.Stack) == maxNestingDepth:
return errMaxDepth
default:
m.Last.Increment()
m.Stack = append(m.Stack, m.Last)
m.Last = stateTypeArray
return nil
}
}
// popArray appends a JSON end array token as next in the sequence.
// If an error is returned, the state is not mutated.
func (m *stateMachine) popArray() error {
switch {
case !m.Last.isArray() || len(m.Stack) == 0: // forbid popping top-level virtual JSON array
return errMismatchDelim
case !m.Last.isValidNamespace():
return errInvalidNamespace
default:
m.Last = m.Stack[len(m.Stack)-1]
m.Stack = m.Stack[:len(m.Stack)-1]
return nil
}
}
// NeedIndent reports whether indent whitespace should be injected.
// A zero value means that no whitespace should be injected.
// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody
// should be appended to the output immediately before the next token.
func (m stateMachine) NeedIndent(next Kind) (n int) {
willEnd := next == '}' || next == ']'
switch {
case m.Depth() == 1:
return 0 // top-level values are never indented
case m.Last.Length() == 0 && willEnd:
return 0 // an empty object or array is never indented
case m.Last.Length() == 0 || m.Last.needImplicitComma(next):
return m.Depth()
case willEnd:
return m.Depth() - 1
default:
return 0
}
}
// MayAppendDelim appends a colon or comma that may precede the next token.
func (m stateMachine) MayAppendDelim(b []byte, next Kind) []byte {
switch {
case m.Last.needImplicitColon():
return append(b, ':')
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
return append(b, ',')
default:
return b
}
}
// needDelim reports whether a colon or comma token should be implicitly emitted
// before the next token of the specified kind.
// A zero value means no delimiter should be emitted.
func (m stateMachine) needDelim(next Kind) (delim byte) {
switch {
case m.Last.needImplicitColon():
return ':'
case m.Last.needImplicitComma(next) && len(m.Stack) != 0: // comma not needed for top-level values
return ','
default:
return 0
}
}
// checkDelim reports whether the specified delimiter should be there given
// the kind of the next token that appears immediately afterwards.
func (m stateMachine) checkDelim(delim byte, next Kind) error {
switch needDelim := m.needDelim(next); {
case needDelim == delim:
return nil
case needDelim == ':':
return errMissingColon
case needDelim == ',':
return errMissingComma
default:
return newInvalidCharacterError([]byte{delim}, "before next token")
}
}
// InvalidateDisabledNamespaces marks all disabled namespaces as invalid.
//
// For efficiency, Marshal and Unmarshal may disable namespaces since there are
// more efficient ways to track duplicate names. However, if an error occurs,
// the namespaces in Encoder or Decoder will be left in an inconsistent state.
// Mark the namespaces as invalid so that future method calls on
// Encoder or Decoder will return an error.
func (m *stateMachine) InvalidateDisabledNamespaces() {
for i := 0; i < m.Depth(); i++ {
e := m.index(i)
if !e.isActiveNamespace() {
e.invalidateNamespace()
}
}
}
// stateEntry encodes several artifacts within a single unsigned integer:
// - whether this represents a JSON object or array,
// - whether this object should check for duplicate names, and
// - how many elements are in this JSON object or array.
type stateEntry uint64
const (
// The type mask (1 bit) records whether this is a JSON object or array.
stateTypeMask stateEntry = 0x8000_0000_0000_0000
stateTypeObject stateEntry = 0x8000_0000_0000_0000
stateTypeArray stateEntry = 0x0000_0000_0000_0000
// The name check mask (2 bit) records whether to update
// the namespaces for the current JSON object and
// whether the namespace is valid.
stateNamespaceMask stateEntry = 0x6000_0000_0000_0000
stateDisableNamespace stateEntry = 0x4000_0000_0000_0000
stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000
// The count mask (61 bits) records the number of elements.
stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff
stateCountLSBMask stateEntry = 0x0000_0000_0000_0001
stateCountOdd stateEntry = 0x0000_0000_0000_0001
stateCountEven stateEntry = 0x0000_0000_0000_0000
)
// Length reports the number of elements in the JSON object or array.
// Each name and value in an object entry is treated as a separate element.
func (e stateEntry) Length() int {
return int(e & stateCountMask)
}
// isObject reports whether this is a JSON object.
func (e stateEntry) isObject() bool {
return e&stateTypeMask == stateTypeObject
}
// isArray reports whether this is a JSON array.
func (e stateEntry) isArray() bool {
return e&stateTypeMask == stateTypeArray
}
// NeedObjectName reports whether the next token must be a JSON string,
// which is necessary for JSON object names.
func (e stateEntry) NeedObjectName() bool {
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven
}
// needImplicitColon reports whether an colon should occur next,
// which always occurs after JSON object names.
func (e stateEntry) needImplicitColon() bool {
return e.needObjectValue()
}
// needObjectValue reports whether the next token must be a JSON value,
// which is necessary after every JSON object name.
func (e stateEntry) needObjectValue() bool {
return e&(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd
}
// needImplicitComma reports whether an comma should occur next,
// which always occurs after a value in a JSON object or array
// before the next value (or name).
func (e stateEntry) needImplicitComma(next Kind) bool {
return !e.needObjectValue() && e.Length() > 0 && next != '}' && next != ']'
}
// Increment increments the number of elements for the current object or array.
// This assumes that overflow won't practically be an issue since
// 1<<bits.OnesCount(stateCountMask) is sufficiently large.
func (e *stateEntry) Increment() {
(*e)++
}
// decrement decrements the number of elements for the current object or array.
// It is the callers responsibility to ensure that e.length > 0.
func (e *stateEntry) decrement() {
(*e)--
}
// DisableNamespace disables the JSON object namespace such that the
// Encoder or Decoder no longer updates the namespace.
func (e *stateEntry) DisableNamespace() {
*e |= stateDisableNamespace
}
// isActiveNamespace reports whether the JSON object namespace is actively
// being updated and used for duplicate name checks.
func (e stateEntry) isActiveNamespace() bool {
return e&(stateDisableNamespace) == 0
}
// invalidateNamespace marks the JSON object namespace as being invalid.
func (e *stateEntry) invalidateNamespace() {
*e |= stateInvalidNamespace
}
// isValidNamespace reports whether the JSON object namespace is valid.
func (e stateEntry) isValidNamespace() bool {
return e&(stateInvalidNamespace) == 0
}
// objectNameStack is a stack of names when descending into a JSON object.
// In contrast to objectNamespaceStack, this only has to remember a single name
// per JSON object.
//
// This data structure may contain offsets to encodeBuffer or decodeBuffer.
// It violates clean abstraction of layers, but is significantly more efficient.
// This ensures that popping and pushing in the common case is a trivial
// push/pop of an offset integer.
//
// The zero value is an empty names stack ready for use.
type objectNameStack struct {
// offsets is a stack of offsets for each name.
// A non-negative offset is the ending offset into the local names buffer.
// A negative offset is the bit-wise inverse of a starting offset into
// a remote buffer (e.g., encodeBuffer or decodeBuffer).
// A math.MinInt offset at the end implies that the last object is empty.
// Invariant: Positive offsets always occur before negative offsets.
offsets []int
// unquotedNames is a back-to-back concatenation of names.
unquotedNames []byte
}
func (ns *objectNameStack) reset() {
ns.offsets = ns.offsets[:0]
ns.unquotedNames = ns.unquotedNames[:0]
if cap(ns.offsets) > 1<<6 {
ns.offsets = nil // avoid pinning arbitrarily large amounts of memory
}
if cap(ns.unquotedNames) > 1<<10 {
ns.unquotedNames = nil // avoid pinning arbitrarily large amounts of memory
}
}
func (ns *objectNameStack) length() int {
return len(ns.offsets)
}
// getUnquoted retrieves the ith unquoted name in the namespace.
// It returns an empty string if the last object is empty.
//
// Invariant: Must call copyQuotedBuffer beforehand.
func (ns *objectNameStack) getUnquoted(i int) []byte {
ns.ensureCopiedBuffer()
if i == 0 {
return ns.unquotedNames[:ns.offsets[0]]
} else {
return ns.unquotedNames[ns.offsets[i-1]:ns.offsets[i-0]]
}
}
// invalidOffset indicates that the last JSON object currently has no name.
const invalidOffset = math.MinInt
// push descends into a nested JSON object.
func (ns *objectNameStack) push() {
ns.offsets = append(ns.offsets, invalidOffset)
}
// ReplaceLastQuotedOffset replaces the last name with the starting offset
// to the quoted name in some remote buffer. All offsets provided must be
// relative to the same buffer until copyQuotedBuffer is called.
func (ns *objectNameStack) ReplaceLastQuotedOffset(i int) {
// Use bit-wise inversion instead of naive multiplication by -1 to avoid
// ambiguity regarding zero (which is a valid offset into the names field).
// Bit-wise inversion is mathematically equivalent to -i-1,
// such that 0 becomes -1, 1 becomes -2, and so forth.
// This ensures that remote offsets are always negative.
ns.offsets[len(ns.offsets)-1] = ^i
}
// replaceLastUnquotedName replaces the last name with the provided name.
//
// Invariant: Must call copyQuotedBuffer beforehand.
func (ns *objectNameStack) replaceLastUnquotedName(s string) {
ns.ensureCopiedBuffer()
var startOffset int
if len(ns.offsets) > 1 {
startOffset = ns.offsets[len(ns.offsets)-2]
}
ns.unquotedNames = append(ns.unquotedNames[:startOffset], s...)
ns.offsets[len(ns.offsets)-1] = len(ns.unquotedNames)
}
// clearLast removes any name in the last JSON object.
// It is semantically equivalent to ns.push followed by ns.pop.
func (ns *objectNameStack) clearLast() {
ns.offsets[len(ns.offsets)-1] = invalidOffset
}
// pop ascends out of a nested JSON object.
func (ns *objectNameStack) pop() {
ns.offsets = ns.offsets[:len(ns.offsets)-1]
}
// copyQuotedBuffer copies names from the remote buffer into the local names
// buffer so that there are no more offset references into the remote buffer.
// This allows the remote buffer to change contents without affecting
// the names that this data structure is trying to remember.
func (ns *objectNameStack) copyQuotedBuffer(b []byte) {
// Find the first negative offset.
var i int
for i = len(ns.offsets) - 1; i >= 0 && ns.offsets[i] < 0; i-- {
continue
}
// Copy each name from the remote buffer into the local buffer.
for i = i + 1; i < len(ns.offsets); i++ {
if i == len(ns.offsets)-1 && ns.offsets[i] == invalidOffset {
if i == 0 {
ns.offsets[i] = 0
} else {
ns.offsets[i] = ns.offsets[i-1]
}
break // last JSON object had a push without any names
}
// As a form of Hyrum proofing, we write an invalid character into the
// buffer to make misuse of Decoder.ReadToken more obvious.
// We need to undo that mutation here.
quotedName := b[^ns.offsets[i]:]
if quotedName[0] == invalidateBufferByte {
quotedName[0] = '"'
}
// Append the unquoted name to the local buffer.
var startOffset int
if i > 0 {
startOffset = ns.offsets[i-1]
}
if n := jsonwire.ConsumeSimpleString(quotedName); n > 0 {
ns.unquotedNames = append(ns.unquotedNames[:startOffset], quotedName[len(`"`):n-len(`"`)]...)
} else {
ns.unquotedNames, _ = jsonwire.AppendUnquote(ns.unquotedNames[:startOffset], quotedName)
}
ns.offsets[i] = len(ns.unquotedNames)
}
}
func (ns *objectNameStack) ensureCopiedBuffer() {
if len(ns.offsets) > 0 && ns.offsets[len(ns.offsets)-1] < 0 {
panic("BUG: copyQuotedBuffer not called beforehand")
}
}
// objectNamespaceStack is a stack of object namespaces.
// This data structure assists in detecting duplicate names.
type objectNamespaceStack []objectNamespace
// reset resets the object namespace stack.
func (nss *objectNamespaceStack) reset() {
if cap(*nss) > 1<<10 {
*nss = nil
}
*nss = (*nss)[:0]
}
// push starts a new namespace for a nested JSON object.
func (nss *objectNamespaceStack) push() {
if cap(*nss) > len(*nss) {
*nss = (*nss)[:len(*nss)+1]
nss.Last().reset()
} else {
*nss = append(*nss, objectNamespace{})
}
}
// Last returns a pointer to the last JSON object namespace.
func (nss objectNamespaceStack) Last() *objectNamespace {
return &nss[len(nss)-1]
}
// pop terminates the namespace for a nested JSON object.
func (nss *objectNamespaceStack) pop() {
*nss = (*nss)[:len(*nss)-1]
}
// objectNamespace is the namespace for a JSON object.
// In contrast to objectNameStack, this needs to remember a all names
// per JSON object.
//
// The zero value is an empty namespace ready for use.
type objectNamespace struct {
// It relies on a linear search over all the names before switching
// to use a Go map for direct lookup.
// endOffsets is a list of offsets to the end of each name in buffers.
// The length of offsets is the number of names in the namespace.
endOffsets []uint
// allUnquotedNames is a back-to-back concatenation of every name in the namespace.
allUnquotedNames []byte
// mapNames is a Go map containing every name in the namespace.
// Only valid if non-nil.
mapNames map[string]struct{}
}
// reset resets the namespace to be empty.
func (ns *objectNamespace) reset() {
ns.endOffsets = ns.endOffsets[:0]
ns.allUnquotedNames = ns.allUnquotedNames[:0]
ns.mapNames = nil
if cap(ns.endOffsets) > 1<<6 {
ns.endOffsets = nil // avoid pinning arbitrarily large amounts of memory
}
if cap(ns.allUnquotedNames) > 1<<10 {
ns.allUnquotedNames = nil // avoid pinning arbitrarily large amounts of memory
}
}
// length reports the number of names in the namespace.
func (ns *objectNamespace) length() int {
return len(ns.endOffsets)
}
// getUnquoted retrieves the ith unquoted name in the namespace.
func (ns *objectNamespace) getUnquoted(i int) []byte {
if i == 0 {
return ns.allUnquotedNames[:ns.endOffsets[0]]
} else {
return ns.allUnquotedNames[ns.endOffsets[i-1]:ns.endOffsets[i-0]]
}
}
// lastUnquoted retrieves the last name in the namespace.
func (ns *objectNamespace) lastUnquoted() []byte {
return ns.getUnquoted(ns.length() - 1)
}
// insertQuoted inserts a name and reports whether it was inserted,
// which only occurs if name is not already in the namespace.
// The provided name must be a valid JSON string.
func (ns *objectNamespace) insertQuoted(name []byte, isVerbatim bool) bool {
if isVerbatim {
name = name[len(`"`) : len(name)-len(`"`)]
}
return ns.insert(name, !isVerbatim)
}
func (ns *objectNamespace) InsertUnquoted(name []byte) bool {
return ns.insert(name, false)
}
func (ns *objectNamespace) insert(name []byte, quoted bool) bool {
var allNames []byte
if quoted {
allNames, _ = jsonwire.AppendUnquote(ns.allUnquotedNames, name)
} else {
allNames = append(ns.allUnquotedNames, name...)
}
name = allNames[len(ns.allUnquotedNames):]
// Switch to a map if the buffer is too large for linear search.
// This does not add the current name to the map.
if ns.mapNames == nil && (ns.length() > 64 || len(ns.allUnquotedNames) > 1024) {
ns.mapNames = make(map[string]struct{})
var startOffset uint
for _, endOffset := range ns.endOffsets {
name := ns.allUnquotedNames[startOffset:endOffset]
ns.mapNames[string(name)] = struct{}{} // allocates a new string
startOffset = endOffset
}
}
if ns.mapNames == nil {
// Perform linear search over the buffer to find matching names.
// It provides O(n) lookup, but does not require any allocations.
var startOffset uint
for _, endOffset := range ns.endOffsets {
if string(ns.allUnquotedNames[startOffset:endOffset]) == string(name) {
return false
}
startOffset = endOffset
}
} else {
// Use the map if it is populated.
// It provides O(1) lookup, but requires a string allocation per name.
if _, ok := ns.mapNames[string(name)]; ok {
return false
}
ns.mapNames[string(name)] = struct{}{} // allocates a new string
}
ns.allUnquotedNames = allNames
ns.endOffsets = append(ns.endOffsets, uint(len(ns.allUnquotedNames)))
return true
}
// removeLast removes the last name in the namespace.
func (ns *objectNamespace) removeLast() {
if ns.mapNames != nil {
delete(ns.mapNames, string(ns.lastUnquoted()))
}
if ns.length()-1 == 0 {
ns.endOffsets = ns.endOffsets[:0]
ns.allUnquotedNames = ns.allUnquotedNames[:0]
} else {
ns.endOffsets = ns.endOffsets[:ns.length()-1]
ns.allUnquotedNames = ns.allUnquotedNames[:ns.endOffsets[ns.length()-1]]
}
}

View File

@@ -0,0 +1,525 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"math"
"strconv"
"github.com/go-json-experiment/json/internal/jsonflags"
"github.com/go-json-experiment/json/internal/jsonwire"
)
// NOTE: Token is analogous to v1 json.Token.
const (
maxInt64 = math.MaxInt64
minInt64 = math.MinInt64
maxUint64 = math.MaxUint64
minUint64 = 0 // for consistency and readability purposes
invalidTokenPanic = "invalid json.Token; it has been voided by a subsequent json.Decoder call"
)
// Token represents a lexical JSON token, which may be one of the following:
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - a start or end delimiter for a JSON object (i.e., { or } )
// - a start or end delimiter for a JSON array (i.e., [ or ] )
//
// A Token cannot represent entire array or object values, while a [Value] can.
// There is no Token to represent commas and colons since
// these structural tokens can be inferred from the surrounding context.
type Token struct {
nonComparable
// Tokens can exist in either a "raw" or an "exact" form.
// Tokens produced by the Decoder are in the "raw" form.
// Tokens returned by constructors are usually in the "exact" form.
// The Encoder accepts Tokens in either the "raw" or "exact" form.
//
// The following chart shows the possible values for each Token type:
// ╔═════════════════╦════════════╤════════════╤════════════╗
// ║ Token type ║ raw field │ str field │ num field ║
// ╠═════════════════╬════════════╪════════════╪════════════╣
// ║ null (raw) ║ "null" │ "" │ 0 ║
// ║ false (raw) ║ "false" │ "" │ 0 ║
// ║ true (raw) ║ "true" │ "" │ 0 ║
// ║ string (raw) ║ non-empty │ "" │ offset ║
// ║ string (string) ║ nil │ non-empty │ 0 ║
// ║ number (raw) ║ non-empty │ "" │ offset ║
// ║ number (float) ║ nil │ "f" │ non-zero ║
// ║ number (int64) ║ nil │ "i" │ non-zero ║
// ║ number (uint64) ║ nil │ "u" │ non-zero ║
// ║ object (delim) ║ "{" or "}" │ "" │ 0 ║
// ║ array (delim) ║ "[" or "]" │ "" │ 0 ║
// ╚═════════════════╩════════════╧════════════╧════════════╝
//
// Notes:
// - For tokens stored in "raw" form, the num field contains the
// absolute offset determined by raw.previousOffsetStart().
// The buffer itself is stored in raw.previousBuffer().
// - JSON literals and structural characters are always in the "raw" form.
// - JSON strings and numbers can be in either "raw" or "exact" forms.
// - The exact zero value of JSON strings and numbers in the "exact" forms
// have ambiguous representation. Thus, they are always represented
// in the "raw" form.
// raw contains a reference to the raw decode buffer.
// If non-nil, then its value takes precedence over str and num.
// It is only valid if num == raw.previousOffsetStart().
raw *decodeBuffer
// str is the unescaped JSON string if num is zero.
// Otherwise, it is "f", "i", or "u" if num should be interpreted
// as a float64, int64, or uint64, respectively.
str string
// num is a float64, int64, or uint64 stored as a uint64 value.
// It is non-zero for any JSON number in the "exact" form.
num uint64
}
// TODO: Does representing 1-byte delimiters as *decodeBuffer cause performance issues?
var (
Null Token = rawToken("null")
False Token = rawToken("false")
True Token = rawToken("true")
ObjectStart Token = rawToken("{")
ObjectEnd Token = rawToken("}")
ArrayStart Token = rawToken("[")
ArrayEnd Token = rawToken("]")
zeroString Token = rawToken(`""`)
zeroNumber Token = rawToken(`0`)
nanString Token = String("NaN")
pinfString Token = String("Infinity")
ninfString Token = String("-Infinity")
)
func rawToken(s string) Token {
return Token{raw: &decodeBuffer{buf: []byte(s), prevStart: 0, prevEnd: len(s)}}
}
// Bool constructs a Token representing a JSON boolean.
func Bool(b bool) Token {
if b {
return True
}
return False
}
// String constructs a Token representing a JSON string.
// The provided string should contain valid UTF-8, otherwise invalid characters
// may be mangled as the Unicode replacement character.
func String(s string) Token {
if len(s) == 0 {
return zeroString
}
return Token{str: s}
}
// Float constructs a Token representing a JSON number.
// The values NaN, +Inf, and -Inf will be represented
// as a JSON string with the values "NaN", "Infinity", and "-Infinity".
func Float(n float64) Token {
switch {
case math.Float64bits(n) == 0:
return zeroNumber
case math.IsNaN(n):
return nanString
case math.IsInf(n, +1):
return pinfString
case math.IsInf(n, -1):
return ninfString
}
return Token{str: "f", num: math.Float64bits(n)}
}
// Int constructs a Token representing a JSON number from an int64.
func Int(n int64) Token {
if n == 0 {
return zeroNumber
}
return Token{str: "i", num: uint64(n)}
}
// Uint constructs a Token representing a JSON number from a uint64.
func Uint(n uint64) Token {
if n == 0 {
return zeroNumber
}
return Token{str: "u", num: uint64(n)}
}
// Clone makes a copy of the Token such that its value remains valid
// even after a subsequent [Decoder.Read] call.
func (t Token) Clone() Token {
// TODO: Allow caller to avoid any allocations?
if raw := t.raw; raw != nil {
// Avoid copying globals.
if t.raw.prevStart == 0 {
switch t.raw {
case Null.raw:
return Null
case False.raw:
return False
case True.raw:
return True
case ObjectStart.raw:
return ObjectStart
case ObjectEnd.raw:
return ObjectEnd
case ArrayStart.raw:
return ArrayStart
case ArrayEnd.raw:
return ArrayEnd
}
}
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
// TODO(https://go.dev/issue/45038): Use bytes.Clone.
buf := append([]byte(nil), raw.PreviousBuffer()...)
return Token{raw: &decodeBuffer{buf: buf, prevStart: 0, prevEnd: len(buf)}}
}
return t
}
// Bool returns the value for a JSON boolean.
// It panics if the token kind is not a JSON boolean.
func (t Token) Bool() bool {
switch t.raw {
case True.raw:
return true
case False.raw:
return false
default:
panic("invalid JSON token kind: " + t.Kind().String())
}
}
// appendString appends a JSON string to dst and returns it.
// It panics if t is not a JSON string.
func (t Token) appendString(dst []byte, flags *jsonflags.Flags) ([]byte, error) {
if raw := t.raw; raw != nil {
// Handle raw string value.
buf := raw.PreviousBuffer()
if Kind(buf[0]) == '"' {
if jsonwire.ConsumeSimpleString(buf) == len(buf) {
return append(dst, buf...), nil
}
dst, _, err := jsonwire.ReformatString(dst, buf, flags)
return dst, err
}
} else if len(t.str) != 0 && t.num == 0 {
// Handle exact string value.
return jsonwire.AppendQuote(dst, t.str, flags)
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// String returns the unescaped string value for a JSON string.
// For other JSON kinds, this returns the raw JSON representation.
func (t Token) String() string {
// This is inlinable to take advantage of "function outlining".
// This avoids an allocation for the string(b) conversion
// if the caller does not use the string in an escaping manner.
// See https://blog.filippo.io/efficient-go-apis-with-the-inliner/
s, b := t.string()
if len(b) > 0 {
return string(b)
}
return s
}
func (t Token) string() (string, []byte) {
if raw := t.raw; raw != nil {
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := raw.PreviousBuffer()
if buf[0] == '"' {
// TODO: Preserve ValueFlags in Token?
isVerbatim := jsonwire.ConsumeSimpleString(buf) == len(buf)
return "", jsonwire.UnquoteMayCopy(buf, isVerbatim)
}
// Handle tokens that are not JSON strings for fmt.Stringer.
return "", buf
}
if len(t.str) != 0 && t.num == 0 {
return t.str, nil
}
// Handle tokens that are not JSON strings for fmt.Stringer.
if t.num > 0 {
switch t.str[0] {
case 'f':
return string(jsonwire.AppendFloat(nil, math.Float64frombits(t.num), 64)), nil
case 'i':
return strconv.FormatInt(int64(t.num), 10), nil
case 'u':
return strconv.FormatUint(uint64(t.num), 10), nil
}
}
return "<invalid json.Token>", nil
}
// appendNumber appends a JSON number to dst and returns it.
// It panics if t is not a JSON number.
func (t Token) appendNumber(dst []byte, canonicalize bool) ([]byte, error) {
if raw := t.raw; raw != nil {
// Handle raw number value.
buf := raw.PreviousBuffer()
if Kind(buf[0]).normalize() == '0' {
if !canonicalize {
return append(dst, buf...), nil
}
dst, _, err := jsonwire.ReformatNumber(dst, buf, canonicalize)
return dst, err
}
} else if t.num != 0 {
// Handle exact number value.
switch t.str[0] {
case 'f':
return jsonwire.AppendFloat(dst, math.Float64frombits(t.num), 64), nil
case 'i':
return strconv.AppendInt(dst, int64(t.num), 10), nil
case 'u':
return strconv.AppendUint(dst, uint64(t.num), 10), nil
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Float returns the floating-point value for a JSON number.
// It returns a NaN, +Inf, or -Inf value for any JSON string
// with the values "NaN", "Infinity", or "-Infinity".
// It panics for all other cases.
func (t Token) Float() float64 {
if raw := t.raw; raw != nil {
// Handle raw number value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
buf := raw.PreviousBuffer()
if Kind(buf[0]).normalize() == '0' {
fv, _ := jsonwire.ParseFloat(buf, 64)
return fv
}
} else if t.num != 0 {
// Handle exact number value.
switch t.str[0] {
case 'f':
return math.Float64frombits(t.num)
case 'i':
return float64(int64(t.num))
case 'u':
return float64(uint64(t.num))
}
}
// Handle string values with "NaN", "Infinity", or "-Infinity".
if t.Kind() == '"' {
switch t.String() {
case "NaN":
return math.NaN()
case "Infinity":
return math.Inf(+1)
case "-Infinity":
return math.Inf(-1)
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Int returns the signed integer value for a JSON number.
// The fractional component of any number is ignored (truncation toward zero).
// Any number beyond the representation of an int64 will be saturated
// to the closest representable value.
// It panics if the token kind is not a JSON number.
func (t Token) Int() int64 {
if raw := t.raw; raw != nil {
// Handle raw integer value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
neg := false
buf := raw.PreviousBuffer()
if len(buf) > 0 && buf[0] == '-' {
neg, buf = true, buf[1:]
}
if numAbs, ok := jsonwire.ParseUint(buf); ok {
if neg {
if numAbs > -minInt64 {
return minInt64
}
return -1 * int64(numAbs)
} else {
if numAbs > +maxInt64 {
return maxInt64
}
return +1 * int64(numAbs)
}
}
} else if t.num != 0 {
// Handle exact integer value.
switch t.str[0] {
case 'i':
return int64(t.num)
case 'u':
if t.num > maxInt64 {
return maxInt64
}
return int64(t.num)
}
}
// Handle JSON number that is a floating-point value.
if t.Kind() == '0' {
switch fv := t.Float(); {
case fv >= maxInt64:
return maxInt64
case fv <= minInt64:
return minInt64
default:
return int64(fv) // truncation toward zero
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Uint returns the unsigned integer value for a JSON number.
// The fractional component of any number is ignored (truncation toward zero).
// Any number beyond the representation of an uint64 will be saturated
// to the closest representable value.
// It panics if the token kind is not a JSON number.
func (t Token) Uint() uint64 {
// NOTE: This accessor returns 0 for any negative JSON number,
// which might be surprising, but is at least consistent with the behavior
// of saturating out-of-bounds numbers to the closest representable number.
if raw := t.raw; raw != nil {
// Handle raw integer value.
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
neg := false
buf := raw.PreviousBuffer()
if len(buf) > 0 && buf[0] == '-' {
neg, buf = true, buf[1:]
}
if num, ok := jsonwire.ParseUint(buf); ok {
if neg {
return minUint64
}
return num
}
} else if t.num != 0 {
// Handle exact integer value.
switch t.str[0] {
case 'u':
return t.num
case 'i':
if int64(t.num) < minUint64 {
return minUint64
}
return uint64(int64(t.num))
}
}
// Handle JSON number that is a floating-point value.
if t.Kind() == '0' {
switch fv := t.Float(); {
case fv >= maxUint64:
return maxUint64
case fv <= minUint64:
return minUint64
default:
return uint64(fv) // truncation toward zero
}
}
panic("invalid JSON token kind: " + t.Kind().String())
}
// Kind returns the token kind.
func (t Token) Kind() Kind {
switch {
case t.raw != nil:
raw := t.raw
if uint64(raw.previousOffsetStart()) != t.num {
panic(invalidTokenPanic)
}
return Kind(t.raw.buf[raw.prevStart]).normalize()
case t.num != 0:
return '0'
case len(t.str) != 0:
return '"'
default:
return invalidKind
}
}
// Kind represents each possible JSON token kind with a single byte,
// which is conveniently the first byte of that kind's grammar
// with the restriction that numbers always be represented with '0':
//
// - 'n': null
// - 'f': false
// - 't': true
// - '"': string
// - '0': number
// - '{': object start
// - '}': object end
// - '[': array start
// - ']': array end
//
// An invalid kind is usually represented using 0,
// but may be non-zero due to invalid JSON data.
type Kind byte
const invalidKind Kind = 0
// String prints the kind in a humanly readable fashion.
func (k Kind) String() string {
switch k {
case 'n':
return "null"
case 'f':
return "false"
case 't':
return "true"
case '"':
return "string"
case '0':
return "number"
case '{':
return "{"
case '}':
return "}"
case '[':
return "["
case ']':
return "]"
default:
return "<invalid json.Kind: " + jsonwire.QuoteRune(string(k)) + ">"
}
}
// normalize coalesces all possible starting characters of a number as just '0'.
func (k Kind) normalize() Kind {
if k == '-' || ('0' <= k && k <= '9') {
return '0'
}
return k
}

View File

@@ -0,0 +1,305 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package jsontext
import (
"bytes"
"errors"
"io"
"slices"
"strings"
"sync"
"github.com/go-json-experiment/json/internal/jsonflags"
"github.com/go-json-experiment/json/internal/jsonwire"
)
// NOTE: Value is analogous to v1 json.RawMessage.
// Value represents a single raw JSON value, which may be one of the following:
// - a JSON literal (i.e., null, true, or false)
// - a JSON string (e.g., "hello, world!")
// - a JSON number (e.g., 123.456)
// - an entire JSON object (e.g., {"fizz":"buzz"} )
// - an entire JSON array (e.g., [1,2,3] )
//
// Value can represent entire array or object values, while [Token] cannot.
// Value may contain leading and/or trailing whitespace.
type Value []byte
// Clone returns a copy of v.
func (v Value) Clone() Value {
return bytes.Clone(v)
}
// String returns the string formatting of v.
func (v Value) String() string {
if v == nil {
return "null"
}
return string(v)
}
// IsValid reports whether the raw JSON value is syntactically valid
// according to RFC 7493.
//
// It verifies whether the input is properly encoded as UTF-8,
// that escape sequences within strings decode to valid Unicode codepoints, and
// that all names in each object are unique.
// It does not verify whether numbers are representable within the limits
// of any common numeric type (e.g., float64, int64, or uint64).
func (v Value) IsValid() bool {
d := getBufferedDecoder(v)
defer putBufferedDecoder(d)
_, errVal := d.ReadValue()
_, errEOF := d.ReadToken()
return errVal == nil && errEOF == io.EOF
}
// Compact removes all whitespace from the raw JSON value.
//
// It does not reformat JSON strings to use any other representation.
// It is guaranteed to succeed if the input is valid.
// If the value is already compacted, then the buffer is not mutated.
func (v *Value) Compact() error {
return v.reformat(false, false, "", "")
}
// Indent reformats the whitespace in the raw JSON value so that each element
// in a JSON object or array begins on a new, indented line beginning with
// prefix followed by one or more copies of indent according to the nesting.
// The value does not begin with the prefix nor any indention,
// to make it easier to embed inside other formatted JSON data.
//
// It does not reformat JSON strings to use any other representation.
// It is guaranteed to succeed if the input is valid.
// If the value is already indented properly, then the buffer is not mutated.
//
// The prefix and indent strings must be composed of only spaces and/or tabs.
func (v *Value) Indent(prefix, indent string) error {
return v.reformat(false, true, prefix, indent)
}
// Canonicalize canonicalizes the raw JSON value according to the
// JSON Canonicalization Scheme (JCS) as defined by RFC 8785
// where it produces a stable representation of a JSON value.
//
// The output stability is dependent on the stability of the application data
// (see RFC 8785, Appendix E). It cannot produce stable output from
// fundamentally unstable input. For example, if the JSON value
// contains ephemeral data (e.g., a frequently changing timestamp),
// then the value is still unstable regardless of whether this is called.
//
// Note that JCS treats all JSON numbers as IEEE 754 double precision numbers.
// Any numbers with precision beyond what is representable by that form
// will lose their precision when canonicalized. For example, integer values
// beyond ±2⁵³ will lose their precision. It is recommended that
// int64 and uint64 data types be represented as a JSON string.
//
// It is guaranteed to succeed if the input is valid.
// If the value is already canonicalized, then the buffer is not mutated.
func (v *Value) Canonicalize() error {
return v.reformat(true, false, "", "")
}
// TODO: Instead of implementing the v1 Marshaler/Unmarshaler,
// consider implementing the v2 versions instead.
// MarshalJSON returns v as the JSON encoding of v.
// It returns the stored value as the raw JSON output without any validation.
// If v is nil, then this returns a JSON null.
func (v Value) MarshalJSON() ([]byte, error) {
// NOTE: This matches the behavior of v1 json.RawMessage.MarshalJSON.
if v == nil {
return []byte("null"), nil
}
return v, nil
}
// UnmarshalJSON sets v as the JSON encoding of b.
// It stores a copy of the provided raw JSON input without any validation.
func (v *Value) UnmarshalJSON(b []byte) error {
// NOTE: This matches the behavior of v1 json.RawMessage.UnmarshalJSON.
if v == nil {
return errors.New("json.Value: UnmarshalJSON on nil pointer")
}
*v = append((*v)[:0], b...)
return nil
}
// Kind returns the starting token kind.
// For a valid value, this will never include '}' or ']'.
func (v Value) Kind() Kind {
if v := v[jsonwire.ConsumeWhitespace(v):]; len(v) > 0 {
return Kind(v[0]).normalize()
}
return invalidKind
}
func (v *Value) reformat(canonical, multiline bool, prefix, indent string) error {
// Write the entire value to reformat all tokens and whitespace.
e := getBufferedEncoder()
defer putBufferedEncoder(e)
eo := &e.s.Struct
if canonical {
eo.Flags.Set(jsonflags.AllowInvalidUTF8 | 0) // per RFC 8785, section 3.2.4
eo.Flags.Set(jsonflags.AllowDuplicateNames | 0) // per RFC 8785, section 3.1
eo.Flags.Set(jsonflags.CanonicalizeNumbers | 1) // per RFC 8785, section 3.2.2.3
eo.Flags.Set(jsonflags.PreserveRawStrings | 0) // per RFC 8785, section 3.2.2.2
eo.Flags.Set(jsonflags.EscapeForHTML | 0) // per RFC 8785, section 3.2.2.2
eo.Flags.Set(jsonflags.EscapeForJS | 0) // per RFC 8785, section 3.2.2.2
eo.Flags.Set(jsonflags.Expand | 0) // per RFC 8785, section 3.2.1
} else {
if s := strings.TrimLeft(prefix, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent prefix")
}
if s := strings.TrimLeft(indent, " \t"); len(s) > 0 {
panic("json: invalid character " + jsonwire.QuoteRune(s) + " in indent")
}
eo.Flags.Set(jsonflags.AllowInvalidUTF8 | 1)
eo.Flags.Set(jsonflags.AllowDuplicateNames | 1)
eo.Flags.Set(jsonflags.PreserveRawStrings | 1)
if multiline {
eo.Flags.Set(jsonflags.Expand | 1)
eo.Flags.Set(jsonflags.Indent | 1)
eo.Flags.Set(jsonflags.IndentPrefix | 1)
eo.IndentPrefix = prefix
eo.Indent = indent
} else {
eo.Flags.Set(jsonflags.Expand | 0)
}
}
eo.Flags.Set(jsonflags.OmitTopLevelNewline | 1)
if err := e.s.WriteValue(*v); err != nil {
return err
}
// For canonical output, we may need to reorder object members.
if canonical {
// Obtain a buffered encoder just to use its internal buffer as
// a scratch buffer in reorderObjects for reordering object members.
e2 := getBufferedEncoder()
defer putBufferedEncoder(e2)
// Disable redundant checks performed earlier during encoding.
d := getBufferedDecoder(e.s.Buf)
defer putBufferedDecoder(d)
d.s.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1)
reorderObjects(d, &e2.s.Buf) // per RFC 8785, section 3.2.3
}
// Store the result back into the value if different.
if !bytes.Equal(*v, e.s.Buf) {
*v = append((*v)[:0], e.s.Buf...)
}
return nil
}
type memberName struct {
// name is the unescaped name.
name []byte
// before and after are byte offsets into Decoder.buf that represents
// the entire name/value pair. It may contain leading commas.
before, after int64
}
var memberNamePool = sync.Pool{New: func() any { return new([]memberName) }}
func getMemberNames() *[]memberName {
ns := memberNamePool.Get().(*[]memberName)
*ns = (*ns)[:0]
return ns
}
func putMemberNames(ns *[]memberName) {
if cap(*ns) < 1<<10 {
clear(*ns) // avoid pinning name
memberNamePool.Put(ns)
}
}
// reorderObjects recursively reorders all object members in place
// according to the ordering specified in RFC 8785, section 3.2.3.
//
// Pre-conditions:
// - The value is valid (i.e., no decoder errors should ever occur).
// - The value is compact (i.e., no whitespace is present).
// - Initial call is provided a Decoder reading from the start of v.
//
// Post-conditions:
// - Exactly one JSON value is read from the Decoder.
// - All fully-parsed JSON objects are reordered by directly moving
// the members in the value buffer.
//
// The runtime is approximately O(n·log(n)) + O(m·log(m)),
// where n is len(v) and m is the total number of object members.
func reorderObjects(d *Decoder, scratch *[]byte) {
switch tok, _ := d.ReadToken(); tok.Kind() {
case '{':
// Iterate and collect the name and offsets for every object member.
members := getMemberNames()
defer putMemberNames(members)
var prevName []byte
isSorted := true
beforeBody := d.InputOffset() // offset after '{'
for d.PeekKind() != '}' {
beforeName := d.InputOffset()
var flags jsonwire.ValueFlags
name, _ := d.s.ReadValue(&flags)
name = jsonwire.UnquoteMayCopy(name, flags.IsVerbatim())
reorderObjects(d, scratch)
afterValue := d.InputOffset()
if isSorted && len(*members) > 0 {
isSorted = jsonwire.CompareUTF16(prevName, []byte(name)) < 0
}
*members = append(*members, memberName{name, beforeName, afterValue})
prevName = name
}
afterBody := d.InputOffset() // offset before '}'
d.ReadToken()
// Sort the members; return early if it's already sorted.
if isSorted {
return
}
slices.SortFunc(*members, func(x, y memberName) int {
return jsonwire.CompareUTF16(x.name, y.name)
})
// Append the reordered members to a new buffer,
// then copy the reordered members back over the original members.
// Avoid swapping in place since each member may be a different size
// where moving a member over a smaller member may corrupt the data
// for subsequent members before they have been moved.
//
// The following invariant must hold:
// sum([m.after-m.before for m in members]) == afterBody-beforeBody
sorted := (*scratch)[:0]
for i, member := range *members {
if d.s.buf[member.before] == ',' {
member.before++ // trim leading comma
}
sorted = append(sorted, d.s.buf[member.before:member.after]...)
if i < len(*members)-1 {
sorted = append(sorted, ',') // append trailing comma
}
}
if int(afterBody-beforeBody) != len(sorted) {
panic("BUG: length invariant violated")
}
copy(d.s.buf[beforeBody:afterBody], sorted)
// Update scratch buffer to the largest amount ever used.
if len(sorted) > len(*scratch) {
*scratch = sorted
}
case '[':
for d.PeekKind() != ']' {
reorderObjects(d, scratch)
}
d.ReadToken()
}
}