add NodeName and OuterHtml utility functions

This commit is contained in:
Martin Angers
2016-02-02 12:28:50 -05:00
parent 417cce822c
commit b0437209ef
4 changed files with 195 additions and 1 deletions

View File

@@ -30,6 +30,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.**
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
@@ -56,6 +57,8 @@ jQuery often has many variants for the same function (no argument, a selector st
* The signatures accepting a function as argument in jQuery are defined in goquery as `XxxFunction()` and take a function as argument (e.g.: `FilterFunction()`)
* The goquery methods that can be called with a selector string have a corresponding version that take a `Matcher` interface and are defined as `XxxMatcher()` (e.g.: `IsMatcher()`)
Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
The complete [godoc reference documentation can be found here][doc].
Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details.

7
doc.go
View File

@@ -35,7 +35,7 @@ Also, because the net/html parser requires UTF-8 encoding, so does goquery: it i
the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML.
See the repository's wiki for various options on how to do this.
Syntax-wise, it is as close as possible to jQuery, with the same function names when
Syntax-wise, it is as close as possible to jQuery, with the same method names when
possible, and that warm and fuzzy chainable interface. jQuery being the
ultra-popular library that it is, writing a similar HTML-manipulating
library was better to follow its API than to start anew (in the same spirit as
@@ -114,5 +114,10 @@ The three dots (...) indicate that various "overloads" are available.
- Document
- Selection
- Matcher
* utilities.go : definition of helper functions (and not methods on a *Selection)
that are not part of jQuery, but are useful to goquery.
- NodeName
- OuterHtml
*/
package goquery

View File

@@ -1,9 +1,67 @@
package goquery
import (
"bytes"
"golang.org/x/net/html"
)
var nodeNames = []string{
html.ErrorNode: "#error",
html.TextNode: "#text",
html.DocumentNode: "#document",
html.CommentNode: "#comment",
}
// NodeName returns the node name of the first element in the selection.
// It tries to behave in a similar way as the DOM's nodeName property
// (https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeName).
//
// Go's net/html package defines the following node types, listed with
// the corresponding returned value from this function:
//
// ErrorNode : #error
// TextNode : #text
// DocumentNode : #document
// ElementNode : the element's tag name
// CommentNode : #comment
// DoctypeNode : the name of the document type
//
func NodeName(s *Selection) string {
if s.Length() == 0 {
return ""
}
switch n := s.Get(0); n.Type {
case html.ElementNode, html.DoctypeNode:
return n.Data
default:
if n.Type >= 0 && int(n.Type) < len(nodeNames) {
return nodeNames[n.Type]
}
return ""
}
}
// OuterHtml returns the outer HTML rendering of the first item in
// the selection - that is, the HTML including the first element's
// tag and attributes.
//
// Unlike InnerHtml, this is a function and not a method on the Selection,
// because this is not a jQuery method (in javascript-land, this is
// a property provided by the DOM).
func OuterHtml(s *Selection) (string, error) {
var buf bytes.Buffer
if s.Length() == 0 {
return "", nil
}
n := s.Get(0)
if err := html.Render(&buf, n); err != nil {
return "", err
}
return buf.String(), nil
}
func getChildren(n *html.Node) (result []*html.Node) {
for c := n.FirstChild; c != nil; c = c.NextSibling {
result = append(result, c)

128
utilities_test.go Normal file
View File

@@ -0,0 +1,128 @@
package goquery
import (
"reflect"
"sort"
"strings"
"testing"
"golang.org/x/net/html"
)
var allNodes = `<!doctype html>
<html>
<head>
<meta a="b">
</head>
<body>
<p><!-- this is a comment -->
This is some text.
</p>
<div></div>
<h1 class="header"></h1>
<h2 class="header"></h2>
</body>
</html>`
func TestNodeName(t *testing.T) {
doc, err := NewDocumentFromReader(strings.NewReader(allNodes))
if err != nil {
t.Fatal(err)
}
n0 := doc.Nodes[0]
nDT := n0.FirstChild
sMeta := doc.Find("meta")
nMeta := sMeta.Get(0)
sP := doc.Find("p")
nP := sP.Get(0)
nComment := nP.FirstChild
nText := nComment.NextSibling
cases := []struct {
node *html.Node
typ html.NodeType
want string
}{
{n0, html.DocumentNode, nodeNames[html.DocumentNode]},
{nDT, html.DoctypeNode, "html"},
{nMeta, html.ElementNode, "meta"},
{nP, html.ElementNode, "p"},
{nComment, html.CommentNode, nodeNames[html.CommentNode]},
{nText, html.TextNode, nodeNames[html.TextNode]},
}
for i, c := range cases {
got := NodeName(newSingleSelection(c.node, doc))
if c.node.Type != c.typ {
t.Errorf("%d: want type %v, got %v", i, c.typ, c.node.Type)
}
if got != c.want {
t.Errorf("%d: want %q, got %q", i, c.want, got)
}
}
}
func TestNodeNameMultiSel(t *testing.T) {
doc, err := NewDocumentFromReader(strings.NewReader(allNodes))
if err != nil {
t.Fatal(err)
}
in := []string{"p", "h1", "div"}
var out []string
doc.Find(strings.Join(in, ", ")).Each(func(i int, s *Selection) {
got := NodeName(s)
out = append(out, got)
})
sort.Strings(in)
sort.Strings(out)
if !reflect.DeepEqual(in, out) {
t.Error("want %v, got %v")
}
}
func TestOuterHtml(t *testing.T) {
doc, err := NewDocumentFromReader(strings.NewReader(allNodes))
if err != nil {
t.Fatal(err)
}
n0 := doc.Nodes[0]
nDT := n0.FirstChild
sMeta := doc.Find("meta")
sP := doc.Find("p")
nP := sP.Get(0)
nComment := nP.FirstChild
nText := nComment.NextSibling
sHeaders := doc.Find(".header")
cases := []struct {
node *html.Node
sel *Selection
want string
}{
{nDT, nil, "<!DOCTYPE html>"}, // render makes DOCTYPE all caps
{nil, sMeta, `<meta a="b"/>`}, // and auto-closes the meta
{nil, sP, `<p><!-- this is a comment -->
This is some text.
</p>`},
{nComment, nil, "<!-- this is a comment -->"},
{nText, nil, `
This is some text.
`},
{nil, sHeaders, `<h1 class="header"></h1>`},
}
for i, c := range cases {
if c.sel == nil {
c.sel = newSingleSelection(c.node, doc)
}
got, err := OuterHtml(c.sel)
if err != nil {
t.Fatal(err)
}
if got != c.want {
t.Errorf("%d: want %q, got %q", i, c.want, got)
}
}
}