diff --git a/utilities.go b/utilities.go
index b4c061a..5ff58bd 100644
--- a/utilities.go
+++ b/utilities.go
@@ -18,6 +18,62 @@ var nodeNames = []string{
html.CommentNode: "#comment",
}
+// PathForNode returns a unique path to retrieve the specified node
+// from its document tree. The path is a slice of int indices, starting
+// at the root of the tree.
+func PathForNode(n *html.Node) []int {
+ var indices []int
+ for n := n; n != nil; n = n.Parent {
+ ix := 0
+ for prev := n.PrevSibling; prev != nil; prev = prev.PrevSibling {
+ ix++
+ }
+ indices = append(indices, ix)
+ }
+
+ // reverse the slice of indices
+ for l, r := 0, len(indices)-1; l < r; l, r = l+1, r-1 {
+ indices[l], indices[r] = indices[r], indices[l]
+ }
+ return indices
+}
+
+// NodeAtPath returns the HTML node at the specified path in the
+// document tree of the specified n node. The path is followed from
+// the root of the tree. If no node is found by following the path,
+// nil is returned.
+func NodeAtPath(path []int, n *html.Node) *html.Node {
+ if n == nil {
+ return n
+ }
+
+ // start at root
+ for n.Parent != nil {
+ n = n.Parent
+ }
+ for n.PrevSibling != nil {
+ n = n.PrevSibling
+ }
+
+ for i, ix := range path {
+ if i > 0 {
+ n = n.FirstChild
+ if n == nil {
+ return n
+ }
+ }
+
+ for j := 0; j < ix; j++ {
+ n = n.NextSibling
+ if n == nil {
+ return n
+ }
+ }
+ }
+
+ return n
+}
+
// NodeName returns the node name of the first element in the selection.
// It tries to behave in a similar way as the DOM's nodeName property
// (https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeName).
diff --git a/utilities_test.go b/utilities_test.go
index c8e9d54..733abab 100644
--- a/utilities_test.go
+++ b/utilities_test.go
@@ -9,6 +9,91 @@ import (
"golang.org/x/net/html"
)
+var invalidPathNodes = []struct {
+ in string
+ path []int
+}{
+ {"", []int{0, 1, 2}},
+ {"", []int{0, 0, 1, 2, 0}},
+ {"", []int{1}},
+ {"", []int{1, 2}},
+ {"", []int{1, 2, 10}},
+}
+
+var validPathNodes = []struct {
+ in string
+ el string
+ path []int
+}{
+ {"", "a", []int{0, 0, 1, 0}}, // root html body(1) a
+ {"", "meta", []int{0, 0, 0, 0}}, // root html head meta
+ {"", "title", []int{0, 0, 0, 1}}, // root html head title
+ {"", "div", []int{0, 0, 1, 0}}, // root html body(1) div
+ {"", "p", []int{0, 0, 1, 0, 0}}, // root html body(1) div p
+ {"", "a", []int{0, 0, 1, 0, 1}}, // root html body(1) div a(1)
+ {"", "span", []int{0, 0, 1, 0, 2}}, // root html body(1) div span(2)
+}
+
+func TestPathForNode(t *testing.T) {
+ for i, c := range validPathNodes {
+ doc, err := NewDocumentFromReader(strings.NewReader(c.in))
+ if err != nil {
+ t.Errorf("%d: failed to parse: %v", i, err)
+ continue
+ }
+
+ var n *html.Node
+ if sel := doc.Find(c.el); sel.Length() > 0 {
+ n = sel.Get(0)
+ }
+
+ got := PathForNode(n)
+ if !reflect.DeepEqual(c.path, got) {
+ h, _ := OuterHtml(doc.Selection)
+ t.Errorf("%d: want %v, got %v (html: %s)", i, c.path, got, h)
+ }
+ }
+
+ // test a nil node
+ if got := PathForNode(nil); got != nil {
+ t.Errorf("want nil for nil node, got %v", got)
+ }
+}
+
+func TestNodeAtPath(t *testing.T) {
+ // valid cases
+ for i, c := range validPathNodes {
+ n, err := html.Parse(strings.NewReader(c.in))
+ if err != nil {
+ t.Errorf("%d: failed to parse: %v", i, err)
+ continue
+ }
+
+ nn := NodeAtPath(c.path, n)
+ if nn.Data != c.el {
+ t.Errorf("%d: want element %s, got %s (%v)", i, c.el, nn.Data, nn)
+ }
+ }
+
+ // invalid cases
+ for i, c := range invalidPathNodes {
+ n, err := html.Parse(strings.NewReader(c.in))
+ if err != nil {
+ t.Errorf("%d: failed to parse: %v", i, err)
+ continue
+ }
+
+ if got := NodeAtPath(c.path, n); got != nil {
+ t.Errorf("%d: want nil, got %v", i, got)
+ }
+ }
+
+ // test a nil node
+ if got := NodeAtPath([]int{1, 2, 3}, nil); got != nil {
+ t.Errorf("want nil for nil node, got %v", got)
+ }
+}
+
var allNodes = `