add NodeName and OuterHtml utility functions

This commit is contained in:
Martin Angers
2016-02-02 12:28:50 -05:00
parent 417cce822c
commit b0437209ef
4 changed files with 195 additions and 1 deletions

View File

@@ -30,6 +30,7 @@ Please note that because of the net/html dependency, goquery requires Go1.1+.
**Note that goquery's API is now stable, and will not break.** **Note that goquery's API is now stable, and will not break.**
* **2016-02-02** : Add `NodeName` utility function similar to the DOM's `nodeName` property. It returns the tag name of the first element in a selection, and other relevant values of non-element nodes (see godoc for details). Add `OuterHtml` utility function similar to the DOM's `outerHTML` property (named `OuterHtml` in small caps for consistency with the existing `Html` method on the `Selection`).
* **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr]. * **2015-04-20** : Add `AttrOr` helper method to return the attribute's value or a default value if absent. Thanks to [piotrkowalczuk][piotr].
* **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone]. * **2015-02-04** : Add more manipulation functions - Prepend* - thanks again to [Andrew Stone][thatguystone].
* **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone]. * **2014-11-28** : Add more manipulation functions - ReplaceWith*, Wrap* and Unwrap - thanks again to [Andrew Stone][thatguystone].
@@ -56,6 +57,8 @@ jQuery often has many variants for the same function (no argument, a selector st
* The signatures accepting a function as argument in jQuery are defined in goquery as `XxxFunction()` and take a function as argument (e.g.: `FilterFunction()`) * The signatures accepting a function as argument in jQuery are defined in goquery as `XxxFunction()` and take a function as argument (e.g.: `FilterFunction()`)
* The goquery methods that can be called with a selector string have a corresponding version that take a `Matcher` interface and are defined as `XxxMatcher()` (e.g.: `IsMatcher()`) * The goquery methods that can be called with a selector string have a corresponding version that take a `Matcher` interface and are defined as `XxxMatcher()` (e.g.: `IsMatcher()`)
Utility functions that are not in jQuery but are useful in Go are implemented as functions (that take a `*Selection` as parameter), to avoid a potential naming clash on the `*Selection`'s methods (reserved for jQuery-equivalent behaviour).
The complete [godoc reference documentation can be found here][doc]. The complete [godoc reference documentation can be found here][doc].
Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details. Please note that Cascadia's selectors do not necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details.

7
doc.go
View File

@@ -35,7 +35,7 @@ Also, because the net/html parser requires UTF-8 encoding, so does goquery: it i
the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML. the caller's responsibility to ensure that the source document provides UTF-8 encoded HTML.
See the repository's wiki for various options on how to do this. See the repository's wiki for various options on how to do this.
Syntax-wise, it is as close as possible to jQuery, with the same function names when Syntax-wise, it is as close as possible to jQuery, with the same method names when
possible, and that warm and fuzzy chainable interface. jQuery being the possible, and that warm and fuzzy chainable interface. jQuery being the
ultra-popular library that it is, writing a similar HTML-manipulating ultra-popular library that it is, writing a similar HTML-manipulating
library was better to follow its API than to start anew (in the same spirit as library was better to follow its API than to start anew (in the same spirit as
@@ -114,5 +114,10 @@ The three dots (...) indicate that various "overloads" are available.
- Document - Document
- Selection - Selection
- Matcher - Matcher
* utilities.go : definition of helper functions (and not methods on a *Selection)
that are not part of jQuery, but are useful to goquery.
- NodeName
- OuterHtml
*/ */
package goquery package goquery

View File

@@ -1,9 +1,67 @@
package goquery package goquery
import ( import (
"bytes"
"golang.org/x/net/html" "golang.org/x/net/html"
) )
var nodeNames = []string{
html.ErrorNode: "#error",
html.TextNode: "#text",
html.DocumentNode: "#document",
html.CommentNode: "#comment",
}
// NodeName returns the node name of the first element in the selection.
// It tries to behave in a similar way as the DOM's nodeName property
// (https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeName).
//
// Go's net/html package defines the following node types, listed with
// the corresponding returned value from this function:
//
// ErrorNode : #error
// TextNode : #text
// DocumentNode : #document
// ElementNode : the element's tag name
// CommentNode : #comment
// DoctypeNode : the name of the document type
//
func NodeName(s *Selection) string {
if s.Length() == 0 {
return ""
}
switch n := s.Get(0); n.Type {
case html.ElementNode, html.DoctypeNode:
return n.Data
default:
if n.Type >= 0 && int(n.Type) < len(nodeNames) {
return nodeNames[n.Type]
}
return ""
}
}
// OuterHtml returns the outer HTML rendering of the first item in
// the selection - that is, the HTML including the first element's
// tag and attributes.
//
// Unlike InnerHtml, this is a function and not a method on the Selection,
// because this is not a jQuery method (in javascript-land, this is
// a property provided by the DOM).
func OuterHtml(s *Selection) (string, error) {
var buf bytes.Buffer
if s.Length() == 0 {
return "", nil
}
n := s.Get(0)
if err := html.Render(&buf, n); err != nil {
return "", err
}
return buf.String(), nil
}
func getChildren(n *html.Node) (result []*html.Node) { func getChildren(n *html.Node) (result []*html.Node) {
for c := n.FirstChild; c != nil; c = c.NextSibling { for c := n.FirstChild; c != nil; c = c.NextSibling {
result = append(result, c) result = append(result, c)

128
utilities_test.go Normal file
View File

@@ -0,0 +1,128 @@
package goquery
import (
"reflect"
"sort"
"strings"
"testing"
"golang.org/x/net/html"
)
var allNodes = `<!doctype html>
<html>
<head>
<meta a="b">
</head>
<body>
<p><!-- this is a comment -->
This is some text.
</p>
<div></div>
<h1 class="header"></h1>
<h2 class="header"></h2>
</body>
</html>`
func TestNodeName(t *testing.T) {
doc, err := NewDocumentFromReader(strings.NewReader(allNodes))
if err != nil {
t.Fatal(err)
}
n0 := doc.Nodes[0]
nDT := n0.FirstChild
sMeta := doc.Find("meta")
nMeta := sMeta.Get(0)
sP := doc.Find("p")
nP := sP.Get(0)
nComment := nP.FirstChild
nText := nComment.NextSibling
cases := []struct {
node *html.Node
typ html.NodeType
want string
}{
{n0, html.DocumentNode, nodeNames[html.DocumentNode]},
{nDT, html.DoctypeNode, "html"},
{nMeta, html.ElementNode, "meta"},
{nP, html.ElementNode, "p"},
{nComment, html.CommentNode, nodeNames[html.CommentNode]},
{nText, html.TextNode, nodeNames[html.TextNode]},
}
for i, c := range cases {
got := NodeName(newSingleSelection(c.node, doc))
if c.node.Type != c.typ {
t.Errorf("%d: want type %v, got %v", i, c.typ, c.node.Type)
}
if got != c.want {
t.Errorf("%d: want %q, got %q", i, c.want, got)
}
}
}
func TestNodeNameMultiSel(t *testing.T) {
doc, err := NewDocumentFromReader(strings.NewReader(allNodes))
if err != nil {
t.Fatal(err)
}
in := []string{"p", "h1", "div"}
var out []string
doc.Find(strings.Join(in, ", ")).Each(func(i int, s *Selection) {
got := NodeName(s)
out = append(out, got)
})
sort.Strings(in)
sort.Strings(out)
if !reflect.DeepEqual(in, out) {
t.Error("want %v, got %v")
}
}
func TestOuterHtml(t *testing.T) {
doc, err := NewDocumentFromReader(strings.NewReader(allNodes))
if err != nil {
t.Fatal(err)
}
n0 := doc.Nodes[0]
nDT := n0.FirstChild
sMeta := doc.Find("meta")
sP := doc.Find("p")
nP := sP.Get(0)
nComment := nP.FirstChild
nText := nComment.NextSibling
sHeaders := doc.Find(".header")
cases := []struct {
node *html.Node
sel *Selection
want string
}{
{nDT, nil, "<!DOCTYPE html>"}, // render makes DOCTYPE all caps
{nil, sMeta, `<meta a="b"/>`}, // and auto-closes the meta
{nil, sP, `<p><!-- this is a comment -->
This is some text.
</p>`},
{nComment, nil, "<!-- this is a comment -->"},
{nText, nil, `
This is some text.
`},
{nil, sHeaders, `<h1 class="header"></h1>`},
}
for i, c := range cases {
if c.sel == nil {
c.sel = newSingleSelection(c.node, doc)
}
got, err := OuterHtml(c.sel)
if err != nil {
t.Fatal(err)
}
if got != c.want {
t.Errorf("%d: want %q, got %q", i, c.want, got)
}
}
}