diff --git a/add.go b/add.go new file mode 100644 index 0000000..a502ebf --- /dev/null +++ b/add.go @@ -0,0 +1,14 @@ +package goquery + +// Adds matching nodes to the current selection. Returns the same Selection object. +// The new selector string is run in the context of the document of the Selection object. +func (this *Selection) Add(selector string) *Selection { + this.Nodes = append(this.Nodes, findWithContext(selector, this.document.Root)...) + return this +} + +// Adds nodes of the specified Selection object to the current selection. Returns this (the same Selection object). +func (this *Selection) AddFromSelection(sel *Selection) *Selection { + this.Nodes = append(this.Nodes, sel.Nodes...) + return this +} diff --git a/attr.go b/attr.go new file mode 100644 index 0000000..d91c287 --- /dev/null +++ b/attr.go @@ -0,0 +1,29 @@ +package goquery + +import ( + "exp/html" +) + +// The Attr() method gets the attribute value for only the first element in the Selection. +// To get the value for each element individually, use a looping construct such as Each() or Map() method. +func (this *Selection) Attr(attrName string) (val string, exists bool) { + if len(this.Nodes) == 0 { + return + } + return getAttributeValue(attrName, this.Nodes[0]) +} + +func getAttributeValue(attrName string, n *html.Node) (val string, exists bool) { + if n == nil { + return + } + + for _, a := range n.Attr { + if a.Key == attrName { + val = a.Val + exists = true + return + } + } + return +} diff --git a/children.go b/children.go new file mode 100644 index 0000000..4875550 --- /dev/null +++ b/children.go @@ -0,0 +1,54 @@ +package goquery + +import ( + "code.google.com/p/cascadia" + "exp/html" + "strings" +) + +// Returns a new Selection object. +func (this *Document) Children() *Selection { + return this.ChildrenFiltered("") +} + +// Returns a new Selection object. +func (this *Selection) Children() *Selection { + return this.ChildrenFiltered("") +} + +// Returns a new Selection object. +func (this *Document) ChildrenFiltered(selector string) *Selection { + return &Selection{childrenWithContext(selector, this.Root), this} +} + +// Returns a new Selection object. +func (this *Selection) ChildrenFiltered(selector string) *Selection { + return &Selection{childrenWithContext(selector, this.Nodes...), this.document} +} + +func childrenWithContext(selector string, nodes ...*html.Node) []*html.Node { + var matches []*html.Node + var allChildren bool + var sel cascadia.Selector + var e error + + selector = strings.TrimSpace(selector) + if selector == "*" || selector == "" { + // Get all children + allChildren = true + } else { + if sel, e = cascadia.Compile(selector); e != nil { + // Selector doesn't compile, empty selection + return nil + } + } + + for _, n := range nodes { + for _, nchild := range n.Child { + if allChildren || sel(nchild) { + matches = append(matches, nchild) + } + } + } + return matches +} diff --git a/document.go b/document.go new file mode 100644 index 0000000..d7e64a0 --- /dev/null +++ b/document.go @@ -0,0 +1,37 @@ +package goquery + +import ( + "exp/html" + "net/http" + "net/url" +) + +type Document struct { + Root *html.Node + Url *url.URL +} + +func NewDocumentFromNode(root *html.Node) (d *Document) { + // Create and fill the document + d = &Document{root, nil} + return +} + +func NewDocument(url string) (d *Document, e error) { + // Load the URL + res, e := http.Get(url) + if e != nil { + return + } + defer res.Body.Close() + + // Parse the HTML into nodes + root, e := html.Parse(res.Body) + if e != nil { + return + } + + // Create and fill the document + d = &Document{root, res.Request.URL} + return +} diff --git a/each.go b/each.go new file mode 100644 index 0000000..ece25fb --- /dev/null +++ b/each.go @@ -0,0 +1,13 @@ +package goquery + +import ( + "exp/html" +) + +// Returns this (same Selection object) +func (this *Selection) Each(f func(int, *Selection)) *Selection { + for i, n := range this.Nodes { + f(i, &Selection{[]*html.Node{n}, this.document}) + } + return this +} diff --git a/filter.go b/filter.go new file mode 100644 index 0000000..d751e5d --- /dev/null +++ b/filter.go @@ -0,0 +1,58 @@ +package goquery + +import ( + "code.google.com/p/cascadia" + "exp/html" +) + +func (this *Selection) Filter(selector string) *Selection { + sel, e := cascadia.Compile(selector) + if e != nil { + // Selector doesn't compile, which means empty selection + return &Selection{nil, this.document} + } + + return &Selection{sel.Filter(this.Nodes), this.document} +} + +func (this *Selection) FilterFunction(f func(int, *Selection) bool) *Selection { + var matches []*html.Node + + // Check for a match for each current selection + for i, n := range this.Nodes { + if f(i, &Selection{[]*html.Node{n}, this.document}) { + matches = append(matches, n) + } + } + return &Selection{matches, this.document} +} + +func (this *Selection) FilterNode(node *html.Node) *Selection { + // TODO : Use Contains() on the this.Nodes array, if it contains, return node Selection, otherwise empty + for _, n := range this.Nodes { + if n == node { + return &Selection{[]*html.Node{n}, this.document} + } + } + return &Selection{nil, this.document} +} + +func (this *Selection) FilterSelection(s *Selection) *Selection { + // TODO : Exactly an Union() of two Selections, so maybe call it Union(), or have it as synonymous + var matches []*html.Node + + if s == nil { + return &Selection{nil, this.document} + } + + // Check for a match for each current selection + for _, n1 := range this.Nodes { + for _, n2 := range s.Nodes { + if n1 == n2 { + matches = append(matches, n1) + break + } + } + } + return &Selection{matches, this.document} +} diff --git a/find.go b/find.go new file mode 100644 index 0000000..4dca32b --- /dev/null +++ b/find.go @@ -0,0 +1,33 @@ +package goquery + +import ( + "code.google.com/p/cascadia" + "exp/html" +) + +// Returns a new Selection object +func (this *Document) Find(selector string) *Selection { + return &Selection{findWithContext(selector, this.Root), this} +} + +// Returns a new Selection object +func (this *Selection) Find(selector string) *Selection { + return &Selection{findWithContext(selector, this.Nodes...), this.document} +} + +// Private internal implementation of the various Find() methods +func findWithContext(selector string, nodes ...*html.Node) []*html.Node { + var matches []*html.Node + + sel, e := cascadia.Compile(selector) + if e != nil { + // Selector doesn't compile, which means empty selection + return nil + } + + // Match the selector on each node + for _, n := range nodes { + matches = append(matches, sel.MatchAll(n)...) + } + return matches +} diff --git a/first.go b/first.go new file mode 100644 index 0000000..f3f17cb --- /dev/null +++ b/first.go @@ -0,0 +1,13 @@ +package goquery + +import ( + "exp/html" +) + +// Returns a new Selection object +func (this *Selection) First() *Selection { + if len(this.Nodes) == 0 { + return &Selection{nil, this.document} + } + return &Selection{[]*html.Node{this.Nodes[0]}, this.document} +} diff --git a/get.go b/get.go new file mode 100644 index 0000000..69e830a --- /dev/null +++ b/get.go @@ -0,0 +1,18 @@ +package goquery + +import ( + "exp/html" +) + +// Get() without parameter is not implemented, its behaviour would be exactly the same as getting selection.Nodes +func (this *Selection) Get(index int) *html.Node { + var l = len(this.Nodes) + + if index < 0 { + index += l // Negative index gets from the end + } + if index >= 0 && index < l { + return this.Nodes[index] + } + return nil +} diff --git a/goquery.go b/goquery.go index 0558042..a0f173b 100644 --- a/goquery.go +++ b/goquery.go @@ -1,15 +1,5 @@ package goquery -import ( - "code.google.com/p/cascadia" - "exp/html" - //"fmt" - "net/http" - "net/url" - "regexp" - "strings" -) - // TODO : Benchmarks // TODO : Ensure no node is added more than once in a selection (especially with Add...) // TODO : Add the following methods: @@ -21,249 +11,4 @@ import ( // - Eq() // - Find() : Complete with Selection object and Node object as selectors // - Has() - -type Document struct { - Root *html.Node - Url *url.URL -} - -func NewDocumentFromNode(root *html.Node) (d *Document) { - // Create and fill the document - d = &Document{root, nil} - return -} - -func NewDocument(url string) (d *Document, e error) { - // Load the URL - res, e := http.Get(url) - if e != nil { - return - } - defer res.Body.Close() - - // Parse the HTML into nodes - root, e := html.Parse(res.Body) - if e != nil { - return - } - - // Create and fill the document - d = &Document{root, res.Request.URL} - return -} - -type Selection struct { - Nodes []*html.Node - document *Document -} - -// Private internal implementation of the various Find() methods -func findWithContext(selector string, nodes ...*html.Node) []*html.Node { - var matches []*html.Node - - sel, e := cascadia.Compile(selector) - if e != nil { - // Selector doesn't compile, which means empty selection - return nil - } - - // Match the selector on each node - for _, n := range nodes { - matches = append(matches, sel.MatchAll(n)...) - } - return matches -} - -func childrenWithContext(selector string, nodes ...*html.Node) []*html.Node { - var matches []*html.Node - var allChildren bool - var sel cascadia.Selector - var e error - - selector = strings.TrimSpace(selector) - if selector == "*" || selector == "" { - // Get all children - allChildren = true - } else { - if sel, e = cascadia.Compile(selector); e != nil { - // Selector doesn't compile, empty selection - return nil - } - } - - for _, n := range nodes { - for _, nchild := range n.Child { - if allChildren || sel(nchild) { - matches = append(matches, nchild) - } - } - } - return matches -} - -// Returns a new Selection object -func (this *Document) Find(selector string) *Selection { - return &Selection{findWithContext(selector, this.Root), this} -} - -// Returns a new Selection object -func (this *Selection) Find(selector string) *Selection { - return &Selection{findWithContext(selector, this.Nodes...), this.document} -} - -// Returns this (same Selection object) -func (this *Selection) Each(f func(int, *Selection)) *Selection { - for i, n := range this.Nodes { - f(i, &Selection{[]*html.Node{n}, this.document}) - } - return this -} - -// Adds matching nodes to the current selection. Returns the same Selection object. -// The new selector string is run in the context of the document of the Selection object. -func (this *Selection) Add(selector string) *Selection { - this.Nodes = append(this.Nodes, findWithContext(selector, this.document.Root)...) - return this -} - -// Adds nodes of the specified Selection object to the current selection. Returns this (the same Selection object). -func (this *Selection) AddFromSelection(sel *Selection) *Selection { - this.Nodes = append(this.Nodes, sel.Nodes...) - return this -} - -func getAttributeValue(attrName string, n *html.Node) (val string, exists bool) { - if n == nil { - return - } - - for _, a := range n.Attr { - if a.Key == attrName { - val = a.Val - exists = true - return - } - } - return -} - -// The Attr() method gets the attribute value for only the first element in the Selection. -// To get the value for each element individually, use a looping construct such as Each() or Map() method. -func (this *Selection) Attr(attrName string) (val string, exists bool) { - if len(this.Nodes) == 0 { - return - } - return getAttributeValue(attrName, this.Nodes[0]) -} - -// Returns a new Selection object. -func (this *Document) Children() *Selection { - return this.ChildrenFiltered("") -} - -// Returns a new Selection object. -func (this *Selection) Children() *Selection { - return this.ChildrenFiltered("") -} - -// Returns a new Selection object. -func (this *Document) ChildrenFiltered(selector string) *Selection { - return &Selection{childrenWithContext(selector, this.Root), this} -} - -// Returns a new Selection object. -func (this *Selection) ChildrenFiltered(selector string) *Selection { - return &Selection{childrenWithContext(selector, this.Nodes...), this.document} -} - -func (this *Selection) Filter(selector string) *Selection { - sel, e := cascadia.Compile(selector) - if e != nil { - // Selector doesn't compile, which means empty selection - return &Selection{nil, this.document} - } - - return &Selection{sel.Filter(this.Nodes), this.document} -} - -func (this *Selection) FilterFunction(f func(int, *Selection) bool) *Selection { - var matches []*html.Node - - // Check for a match for each current selection - for i, n := range this.Nodes { - if f(i, &Selection{[]*html.Node{n}, this.document}) { - matches = append(matches, n) - } - } - return &Selection{matches, this.document} -} - -func (this *Selection) FilterNode(node *html.Node) *Selection { - // TODO : Use Contains() on the this.Nodes array, if it contains, return node Selection, otherwise empty - for _, n := range this.Nodes { - if n == node { - return &Selection{[]*html.Node{n}, this.document} - } - } - return &Selection{nil, this.document} -} - -func (this *Selection) FilterSelection(s *Selection) *Selection { - // TODO : Exactly an Union() of two Selections, so maybe call it Union(), or have it as synonymous - var matches []*html.Node - - if s == nil { - return &Selection{nil, this.document} - } - - // Check for a match for each current selection - for _, n1 := range this.Nodes { - for _, n2 := range s.Nodes { - if n1 == n2 { - matches = append(matches, n1) - break - } - } - } - return &Selection{matches, this.document} -} - -// Returns a new Selection object -func (this *Selection) First() *Selection { - if len(this.Nodes) == 0 { - return &Selection{nil, this.document} - } - return &Selection{[]*html.Node{this.Nodes[0]}, this.document} -} - -// Get() without parameter is not implemented, its behaviour would be exactly the same as getting selection.Nodes -func (this *Selection) Get(index int) *html.Node { - var l = len(this.Nodes) - - if index < 0 { - index += l // Negative index gets from the end - } - if index >= 0 && index < l { - return this.Nodes[index] - } - return nil -} - -// Returns true if at least one node in the selection has the given class -func (this *Selection) HasClass(class string) bool { - var rx = regexp.MustCompile("[\t\r\n]") - - class = " " + class + " " - for _, n := range this.Nodes { - // Applies only to element nodes - if n.Type == html.ElementNode { - if elClass, ok := getAttributeValue("class", n); ok { - elClass = rx.ReplaceAllString(" "+elClass+" ", " ") - if strings.Index(elClass, class) > -1 { - return true - } - } - } - } - return false -} +// - Html() ? diff --git a/hasclass.go b/hasclass.go new file mode 100644 index 0000000..ba89293 --- /dev/null +++ b/hasclass.go @@ -0,0 +1,26 @@ +package goquery + +import ( + "exp/html" + "regexp" + "strings" +) + +// Returns true if at least one node in the selection has the given class +func (this *Selection) HasClass(class string) bool { + var rx = regexp.MustCompile("[\t\r\n]") + + class = " " + class + " " + for _, n := range this.Nodes { + // Applies only to element nodes + if n.Type == html.ElementNode { + if elClass, ok := getAttributeValue("class", n); ok { + elClass = rx.ReplaceAllString(" "+elClass+" ", " ") + if strings.Index(elClass, class) > -1 { + return true + } + } + } + } + return false +} diff --git a/selection.go b/selection.go new file mode 100644 index 0000000..dbd9967 --- /dev/null +++ b/selection.go @@ -0,0 +1,10 @@ +package goquery + +import ( + "exp/html" +) + +type Selection struct { + Nodes []*html.Node + document *Document +}