mirror of
https://github.com/PuerkitoBio/goquery
synced 2025-09-26 21:01:21 +08:00
add PathForNode and NodeAtPath functions
This commit is contained in:
56
utilities.go
56
utilities.go
@@ -18,6 +18,62 @@ var nodeNames = []string{
|
||||
html.CommentNode: "#comment",
|
||||
}
|
||||
|
||||
// PathForNode returns a unique path to retrieve the specified node
|
||||
// from its document tree. The path is a slice of int indices, starting
|
||||
// at the root of the tree.
|
||||
func PathForNode(n *html.Node) []int {
|
||||
var indices []int
|
||||
for n := n; n != nil; n = n.Parent {
|
||||
ix := 0
|
||||
for prev := n.PrevSibling; prev != nil; prev = prev.PrevSibling {
|
||||
ix++
|
||||
}
|
||||
indices = append(indices, ix)
|
||||
}
|
||||
|
||||
// reverse the slice of indices
|
||||
for l, r := 0, len(indices)-1; l < r; l, r = l+1, r-1 {
|
||||
indices[l], indices[r] = indices[r], indices[l]
|
||||
}
|
||||
return indices
|
||||
}
|
||||
|
||||
// NodeAtPath returns the HTML node at the specified path in the
|
||||
// document tree of the specified n node. The path is followed from
|
||||
// the root of the tree. If no node is found by following the path,
|
||||
// nil is returned.
|
||||
func NodeAtPath(path []int, n *html.Node) *html.Node {
|
||||
if n == nil {
|
||||
return n
|
||||
}
|
||||
|
||||
// start at root
|
||||
for n.Parent != nil {
|
||||
n = n.Parent
|
||||
}
|
||||
for n.PrevSibling != nil {
|
||||
n = n.PrevSibling
|
||||
}
|
||||
|
||||
for i, ix := range path {
|
||||
if i > 0 {
|
||||
n = n.FirstChild
|
||||
if n == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
|
||||
for j := 0; j < ix; j++ {
|
||||
n = n.NextSibling
|
||||
if n == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
// NodeName returns the node name of the first element in the selection.
|
||||
// It tries to behave in a similar way as the DOM's nodeName property
|
||||
// (https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeName).
|
||||
|
@@ -9,6 +9,91 @@ import (
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var invalidPathNodes = []struct {
|
||||
in string
|
||||
path []int
|
||||
}{
|
||||
{"<a>", []int{0, 1, 2}},
|
||||
{"<html><head><meta><title></title></head><body><div><p></p><a></a><span></span></div></body></html>", []int{0, 0, 1, 2, 0}},
|
||||
{"<html><head><meta><title></title></head><body><div><p></p><a></a><span></span></div></body></html>", []int{1}},
|
||||
{"<html><head><meta><title></title></head><body><div><p></p><a></a><span></span></div></body></html>", []int{1, 2}},
|
||||
{"<html><head><meta><title></title></head><body><div><p></p><a></a><span></span></div></body></html>", []int{1, 2, 10}},
|
||||
}
|
||||
|
||||
var validPathNodes = []struct {
|
||||
in string
|
||||
el string
|
||||
path []int
|
||||
}{
|
||||
{"<a>", "a", []int{0, 0, 1, 0}}, // root html body(1) a
|
||||
{"<html><head><meta></head><body></body></html>", "meta", []int{0, 0, 0, 0}}, // root html head meta
|
||||
{"<html><head><meta><title></title></head><body></body></html>", "title", []int{0, 0, 0, 1}}, // root html head title
|
||||
{"<html><head><meta><title></title></head><body><div><p></p></div></body></html>", "div", []int{0, 0, 1, 0}}, // root html body(1) div
|
||||
{"<html><head><meta><title></title></head><body><div><p></p></div></body></html>", "p", []int{0, 0, 1, 0, 0}}, // root html body(1) div p
|
||||
{"<html><head><meta><title></title></head><body><div><p></p><a></a><span></span></div></body></html>", "a", []int{0, 0, 1, 0, 1}}, // root html body(1) div a(1)
|
||||
{"<html><head><meta><title></title></head><body><div><p></p><a></a><span></span></div></body></html>", "span", []int{0, 0, 1, 0, 2}}, // root html body(1) div span(2)
|
||||
}
|
||||
|
||||
func TestPathForNode(t *testing.T) {
|
||||
for i, c := range validPathNodes {
|
||||
doc, err := NewDocumentFromReader(strings.NewReader(c.in))
|
||||
if err != nil {
|
||||
t.Errorf("%d: failed to parse: %v", i, err)
|
||||
continue
|
||||
}
|
||||
|
||||
var n *html.Node
|
||||
if sel := doc.Find(c.el); sel.Length() > 0 {
|
||||
n = sel.Get(0)
|
||||
}
|
||||
|
||||
got := PathForNode(n)
|
||||
if !reflect.DeepEqual(c.path, got) {
|
||||
h, _ := OuterHtml(doc.Selection)
|
||||
t.Errorf("%d: want %v, got %v (html: %s)", i, c.path, got, h)
|
||||
}
|
||||
}
|
||||
|
||||
// test a nil node
|
||||
if got := PathForNode(nil); got != nil {
|
||||
t.Errorf("want nil for nil node, got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodeAtPath(t *testing.T) {
|
||||
// valid cases
|
||||
for i, c := range validPathNodes {
|
||||
n, err := html.Parse(strings.NewReader(c.in))
|
||||
if err != nil {
|
||||
t.Errorf("%d: failed to parse: %v", i, err)
|
||||
continue
|
||||
}
|
||||
|
||||
nn := NodeAtPath(c.path, n)
|
||||
if nn.Data != c.el {
|
||||
t.Errorf("%d: want element %s, got %s (%v)", i, c.el, nn.Data, nn)
|
||||
}
|
||||
}
|
||||
|
||||
// invalid cases
|
||||
for i, c := range invalidPathNodes {
|
||||
n, err := html.Parse(strings.NewReader(c.in))
|
||||
if err != nil {
|
||||
t.Errorf("%d: failed to parse: %v", i, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if got := NodeAtPath(c.path, n); got != nil {
|
||||
t.Errorf("%d: want nil, got %v", i, got)
|
||||
}
|
||||
}
|
||||
|
||||
// test a nil node
|
||||
if got := NodeAtPath([]int{1, 2, 3}, nil); got != nil {
|
||||
t.Errorf("want nil for nil node, got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
var allNodes = `<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
|
Reference in New Issue
Block a user