refactor Children() and Contents() to use mapNodes internally

This commit is contained in:
Martin Angers
2012-09-04 14:56:20 -04:00
parent af4a62048b
commit 6b27ec19a5
3 changed files with 67 additions and 65 deletions

View File

@@ -1,8 +1,8 @@
# goquery - a little like that j-thing, only in Go
GoQuery brings a syntax and features similar to [jQuery][] to the [Go language][go]. It is based on the [experimental html package][exphtml] and the CSS Selector library [cascadia][]. Since the html parser returns tokens (nodes), and not a full-featured DOM object, jQuery's manipulation and modification functions have been left off (no point in modifying data in the parsed tree of the HTML, it has no effect - although it could be used to re-render the HTML from a modified node tree afterwards... maybe someday).
GoQuery brings a syntax and a set of features similar to [jQuery][] to the [Go language][go]. It is based on the [experimental html package][exphtml] and the CSS Selector library [cascadia][]. Since the experimental html parser returns tokens (nodes), and not a full-featured DOM object, jQuery's manipulation and modification functions have been left off (no point in modifying data in the parsed tree of the HTML, it has no effect).
Supported functions are (will be) query-oriented features (`hasClass()`, `attr()` and the likes), as well as traversing functions that make sense given what we have to work with. This makes GoQuery a great library for scraping web pages.
Supported functions are query-oriented features (`hasClass()`, `attr()` and the likes), as well as traversing functions that make sense given what we have to work with. This makes GoQuery a great library for scraping web pages.
Syntax-wise, it is as close as possible to jQuery, with the same function names when possible, and that warm and fuzzy chainable interface. jQuery being the ultra-popular library that it is, I felt that writing a similar HTML-manipulating library was better to follow its API than to start anew (in the same spirit as Go's `fmt` package), even though some of its methods are less than intuitive (looking at you, [index()][index]...).
@@ -16,16 +16,30 @@ Once this is done, install GoQuery:
## API
GoQuery exposes two classes, `Document` and `Selection`. Unlike jQuery, which is loaded as part of a DOM document, and thus acts on its containing document, GoQuery doesn't know which HTML document to act upon. So it needs to be told, and that's what the `Document` class is for. It holds the root document node to manipulate, and can make selections on this document.
GoQuery exposes two classes, `Document` and `Selection`. Unlike jQuery, which is loaded as part of a DOM document, and thus acts on its containing document, GoQuery doesn't know which HTML document to act upon. So it needs to be told, and that's what the `Document` class is for. It holds the root document node as the initial Selection object to manipulate.
Please note that Cascadia's selectors do NOT necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details.
jQuery often has many variants for the same function (no argument, a selector string argument, a jQuery object argument, a DOM element argument, ...). Instead of exposing the same features in GoQuery as a single method with variadic empty interface arguments, I use statically-typed signatures following this naming convention:
* When the jQuery equivalent can be called with no argument, it has the same name as jQuery for the no argument signature (e.g.: `Prev()`), and the version with a selector string argument is called `XxxFiltered()` (e.g.: `PrevFiltered()`)
* When the jQuery equivalent **requires** one argument, the same name as jQuery is used for the selector string version (e.g.: `Is()`)
* The signatures accepting a jQuery object as argument are defined in GoQuery as `XxxSelection()` and take a `*Selection` object as argument (e.g.: `FilterSelection()`)
* The signatures accepting a DOM element as argument in jQuery are defined in GoQuery as `XxxNodes()` and take a variadic argument of type `*html.Node`(e.g.: `FilterNodes()`)
* Finally, the signatures accepting a function as argument in jQuery are defined in GoQuery as `XxxFunction()` and take a function as argument (e.g.: `FilterFunction()`)
GoQuery's complete [godoc reference documentation can be found here][doc].
Please note that Cascadia's selectors do NOT necessarily match all supported selectors of jQuery (Sizzle). See the [cascadia project][cascadia] for details.
## Examples
Coming soon...
## TODOs
* Tests to validate that all methods returning a new `*Selection` "rollback" correctly to the previous Selection when calling `.End()`.
* Benchmarks so that future changes have a baseline to compare to.
* Add jQuery's `Closest()`? Other missing functions?
## License
The [BSD 3-Clause license][bsd], the same as the [Go language][golic]. Cascadia's license is [here][caslic].

6
doc.go
View File

@@ -122,7 +122,7 @@ package goquery
// x End()
// x Eq()
// x Filter()
// - Find() : Complete with Selection object and Node object as selectors - Tree Traversal
// x Find() : Complete with Selection object and Node object as selectors - Tree Traversal
// x First()
// x Get()
// x Has()
@@ -134,14 +134,14 @@ package goquery
// x Length() / Size()
// x Map()
// x Next() - Tree traversal
// - NextAll() - Tree traversal
// x NextAll() - Tree traversal
// - NextUntil() - Tree traversal
// x Not()
// x Parent() - Tree traversal
// x Parents() - Tree traversal
// x ParentsUntil() - Tree traversal
// x Prev() - Tree traversal
// - PrevAll() - Tree traversal
// x PrevAll() - Tree traversal
// - PrevUntil() - Tree traversal
// x PushStack()
// x Siblings() - Tree traversal

View File

@@ -13,6 +13,7 @@ const (
siblingAll
siblingNext
siblingNextAll
siblingAllIncludingNonElements
)
// Find() gets the descendants of each element in the current set of matched
@@ -50,7 +51,7 @@ func (this *Selection) FindNodes(nodes ...*html.Node) *Selection {
// including text and comment nodes. It returns a new Selection object
// containing these elements.
func (this *Selection) Contents() *Selection {
return pushStack(this, getSelectionChildren(this, false))
return pushStack(this, getChildrenNodes(this.Nodes, siblingAllIncludingNonElements))
}
// ContentsFiltered() gets the children of each element in the Selection,
@@ -68,15 +69,14 @@ func (this *Selection) ContentsFiltered(selector string) *Selection {
// Children() gets the child elements of each element in the Selection.
// It returns a new Selection object containing these elements.
func (this *Selection) Children() *Selection {
// TODO : Refactor using siblings?
return pushStack(this, getSelectionChildren(this, true))
return pushStack(this, getChildrenNodes(this.Nodes, siblingAll))
}
// ChildrenFiltered() gets the child elements of each element in the Selection,
// filtered by the specified selector. It returns a new
// Selection object containing these elements.
func (this *Selection) ChildrenFiltered(selector string) *Selection {
return filterAndPush(this, getSelectionChildren(this, true), selector)
return filterAndPush(this, getChildrenNodes(this.Nodes, siblingAll), selector)
}
// Parent() gets the parent of each element in the Selection. It returns a
@@ -250,17 +250,31 @@ func getParentsNodes(nodes []*html.Node, stopSelector string, stopNodes []*html.
// Internal implementation of sibling nodes that return a raw slice of matches.
func getSiblingNodes(nodes []*html.Node, st siblingType) []*html.Node {
return mapNodes(nodes, func(i int, n *html.Node) (result []*html.Node) {
var prev *html.Node
var nFound bool
return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
// Get the parent and loop through all children
if p := n.Parent; p != nil {
for _, c := range p.Child {
return getChildrenWithSiblingType(p, st, n)
}
return nil
})
}
func getChildrenNodes(nodes []*html.Node, st siblingType) []*html.Node {
return mapNodes(nodes, func(i int, n *html.Node) []*html.Node {
return getChildrenWithSiblingType(n, st, nil)
})
}
func getChildrenWithSiblingType(parent *html.Node, st siblingType, skipNode *html.Node) (result []*html.Node) {
var prev *html.Node
var nFound bool
for _, c := range parent.Child {
// Care only about elements
if c.Type == html.ElementNode {
if c.Type == html.ElementNode || st == siblingAllIncludingNonElements {
// Is it the existing node?
if c == n {
if c == skipNode {
// Found the current node
nFound = true
if st == siblingPrev {
@@ -270,7 +284,7 @@ func getSiblingNodes(nodes []*html.Node, st siblingType) []*html.Node {
}
return
}
} else if prev == n && st == siblingNext {
} else if prev == skipNode && st == siblingNext {
// We want only the next node and this is it, so append it and return
result = append(result, c)
return
@@ -280,15 +294,12 @@ func getSiblingNodes(nodes []*html.Node, st siblingType) []*html.Node {
// If child is not the current node, check if sibling type requires
// to add it to the result.
if c != n && (st == siblingAll || (st == siblintPrevAll && !nFound) ||
(st == siblingNextAll && nFound)) {
if c != skipNode && (st == siblingAll || st == siblingAllIncludingNonElements || (st == siblintPrevAll && !nFound) || (st == siblingNextAll && nFound)) {
result = append(result, c)
}
}
}
}
return
})
}
// Internal implementation of parent nodes that return a raw slice of Nodes.
@@ -333,26 +344,3 @@ func findWithContext(selector string, nodes ...*html.Node) []*html.Node {
}
return matches
}
// Return the child nodes of each node in the Selection object, without
// duplicates.
func getSelectionChildren(s *Selection, elemOnly bool) (result []*html.Node) {
// TODO : Refactor to use mapNodes?
for _, n := range s.Nodes {
result = appendWithoutDuplicates(result, getChildren(n, elemOnly))
}
return
}
// Return the immediate children of the node, filtered on element nodes only
// if requested. The result is necessarily a slice of unique nodes.
func getChildren(n *html.Node, elemOnly bool) (result []*html.Node) {
if n != nil {
for _, c := range n.Child {
if c.Type == html.ElementNode || !elemOnly {
result = append(result, c)
}
}
}
return
}