maintain a set of existing target nodes to deduplicate

The old implementation used nested loops to check if one of the
new nodes was already in the existing nodes. This commit
maintains a map[*html.Node]bool so that a single loop over
the new elements is required. This is slower for small
slices though, and allocates more. The next step is for
appendWithoutDuplicates to accept a map as argument so
that the set is not recreated on each call from mapNodes.
This commit is contained in:
Martin Angers
2016-08-27 18:55:21 -04:00
parent ad8b3d8222
commit c7533ba696
3 changed files with 176 additions and 1 deletions

85
bench/v1.0.0-go1.7 Normal file
View File

@@ -0,0 +1,85 @@
BenchmarkFirst-4 30000000 50.7 ns/op 48 B/op 1 allocs/op
BenchmarkLast-4 30000000 50.9 ns/op 48 B/op 1 allocs/op
BenchmarkEq-4 30000000 55.7 ns/op 48 B/op 1 allocs/op
BenchmarkSlice-4 500000000 3.45 ns/op 0 B/op 0 allocs/op
BenchmarkGet-4 2000000000 1.68 ns/op 0 B/op 0 allocs/op
BenchmarkIndex-4 3000000 541 ns/op 248 B/op 10 allocs/op
BenchmarkIndexSelector-4 200000 10749 ns/op 2464 B/op 17 allocs/op
BenchmarkIndexOfNode-4 200000000 6.47 ns/op 0 B/op 0 allocs/op
BenchmarkIndexOfSelection-4 200000000 7.27 ns/op 0 B/op 0 allocs/op
BenchmarkMetalReviewExample-4 10000 138426 ns/op 12240 B/op 319 allocs/op
BenchmarkAdd-4 200000 10192 ns/op 208 B/op 9 allocs/op
BenchmarkAddSelection-4 10000000 158 ns/op 48 B/op 1 allocs/op
BenchmarkAddNodes-4 10000000 156 ns/op 48 B/op 1 allocs/op
BenchmarkAndSelf-4 1000000 1588 ns/op 1008 B/op 5 allocs/op
BenchmarkFilter-4 100000 20427 ns/op 360 B/op 8 allocs/op
BenchmarkNot-4 100000 23508 ns/op 136 B/op 5 allocs/op
BenchmarkFilterFunction-4 50000 34178 ns/op 22976 B/op 755 allocs/op
BenchmarkNotFunction-4 50000 38173 ns/op 29120 B/op 757 allocs/op
BenchmarkFilterNodes-4 50000 34001 ns/op 20960 B/op 749 allocs/op
BenchmarkNotNodes-4 30000 40344 ns/op 29120 B/op 757 allocs/op
BenchmarkFilterSelection-4 50000 33308 ns/op 20960 B/op 749 allocs/op
BenchmarkNotSelection-4 30000 40748 ns/op 29120 B/op 757 allocs/op
BenchmarkHas-4 5000 263346 ns/op 1816 B/op 48 allocs/op
BenchmarkHasNodes-4 10000 160840 ns/op 21184 B/op 752 allocs/op
BenchmarkHasSelection-4 10000 165410 ns/op 21184 B/op 752 allocs/op
BenchmarkEnd-4 2000000000 1.01 ns/op 0 B/op 0 allocs/op
BenchmarkEach-4 300000 4664 ns/op 3304 B/op 118 allocs/op
BenchmarkMap-4 200000 8286 ns/op 5572 B/op 184 allocs/op
BenchmarkEachWithBreak-4 2000000 806 ns/op 560 B/op 20 allocs/op
BenchmarkAttr-4 100000000 21.6 ns/op 0 B/op 0 allocs/op
BenchmarkText-4 200000 8909 ns/op 7536 B/op 110 allocs/op
BenchmarkLength-4 2000000000 0.34 ns/op 0 B/op 0 allocs/op
BenchmarkHtml-4 3000000 422 ns/op 120 B/op 2 allocs/op
BenchmarkIs-4 100000 22615 ns/op 88 B/op 4 allocs/op
BenchmarkIsPositional-4 50000 26655 ns/op 1112 B/op 10 allocs/op
BenchmarkIsFunction-4 1000000 1208 ns/op 784 B/op 28 allocs/op
BenchmarkIsSelection-4 50000 33497 ns/op 20960 B/op 749 allocs/op
BenchmarkIsNodes-4 50000 33572 ns/op 20960 B/op 749 allocs/op
BenchmarkHasClass-4 10000 232802 ns/op 14944 B/op 976 allocs/op
BenchmarkContains-4 200000000 7.33 ns/op 0 B/op 0 allocs/op
BenchmarkFind-4 200000 10715 ns/op 2464 B/op 17 allocs/op
BenchmarkFindWithinSelection-4 50000 35878 ns/op 2176 B/op 78 allocs/op
BenchmarkFindSelection-4 10000 194356 ns/op 2672 B/op 82 allocs/op
BenchmarkFindNodes-4 10000 195510 ns/op 2672 B/op 82 allocs/op
BenchmarkContents-4 1000000 2252 ns/op 864 B/op 34 allocs/op
BenchmarkContentsFiltered-4 500000 3015 ns/op 1016 B/op 39 allocs/op
BenchmarkChildren-4 5000000 364 ns/op 152 B/op 7 allocs/op
BenchmarkChildrenFiltered-4 1000000 2212 ns/op 352 B/op 15 allocs/op
BenchmarkParent-4 50000 24643 ns/op 4048 B/op 381 allocs/op
BenchmarkParentFiltered-4 50000 25967 ns/op 4248 B/op 388 allocs/op
BenchmarkParents-4 30000 50000 ns/op 27776 B/op 830 allocs/op
BenchmarkParentsFiltered-4 30000 53107 ns/op 28360 B/op 838 allocs/op
BenchmarkParentsUntil-4 100000 22423 ns/op 10352 B/op 353 allocs/op
BenchmarkParentsUntilSelection-4 20000 86925 ns/op 51144 B/op 1516 allocs/op
BenchmarkParentsUntilNodes-4 20000 87597 ns/op 51144 B/op 1516 allocs/op
BenchmarkParentsFilteredUntil-4 300000 5568 ns/op 2232 B/op 86 allocs/op
BenchmarkParentsFilteredUntilSelection-4 200000 10966 ns/op 5440 B/op 190 allocs/op
BenchmarkParentsFilteredUntilNodes-4 200000 10919 ns/op 5440 B/op 190 allocs/op
BenchmarkSiblings-4 30000 46018 ns/op 15400 B/op 204 allocs/op
BenchmarkSiblingsFiltered-4 30000 50566 ns/op 16496 B/op 213 allocs/op
BenchmarkNext-4 200000 7921 ns/op 3216 B/op 112 allocs/op
BenchmarkNextFiltered-4 200000 8804 ns/op 3416 B/op 118 allocs/op
BenchmarkNextAll-4 50000 31098 ns/op 9912 B/op 138 allocs/op
BenchmarkNextAllFiltered-4 50000 34677 ns/op 11008 B/op 147 allocs/op
BenchmarkPrev-4 200000 7920 ns/op 3216 B/op 112 allocs/op
BenchmarkPrevFiltered-4 200000 8913 ns/op 3416 B/op 118 allocs/op
BenchmarkPrevAll-4 200000 10845 ns/op 4376 B/op 113 allocs/op
BenchmarkPrevAllFiltered-4 100000 12030 ns/op 4576 B/op 119 allocs/op
BenchmarkNextUntil-4 100000 19193 ns/op 5760 B/op 260 allocs/op
BenchmarkNextUntilSelection-4 50000 34829 ns/op 18480 B/op 542 allocs/op
BenchmarkNextUntilNodes-4 100000 14459 ns/op 7944 B/op 248 allocs/op
BenchmarkPrevUntil-4 20000 66296 ns/op 12856 B/op 448 allocs/op
BenchmarkPrevUntilSelection-4 30000 45037 ns/op 23432 B/op 689 allocs/op
BenchmarkPrevUntilNodes-4 200000 11525 ns/op 6152 B/op 203 allocs/op
BenchmarkNextFilteredUntil-4 100000 12940 ns/op 4512 B/op 173 allocs/op
BenchmarkNextFilteredUntilSelection-4 50000 38924 ns/op 19160 B/op 567 allocs/op
BenchmarkNextFilteredUntilNodes-4 50000 38528 ns/op 19160 B/op 567 allocs/op
BenchmarkPrevFilteredUntil-4 100000 12980 ns/op 4664 B/op 175 allocs/op
BenchmarkPrevFilteredUntilSelection-4 50000 39671 ns/op 19936 B/op 587 allocs/op
BenchmarkPrevFilteredUntilNodes-4 50000 39484 ns/op 19936 B/op 587 allocs/op
BenchmarkClosest-4 500000 3310 ns/op 160 B/op 8 allocs/op
BenchmarkClosestSelection-4 5000000 361 ns/op 96 B/op 6 allocs/op
BenchmarkClosestNodes-4 5000000 359 ns/op 96 B/op 6 allocs/op
PASS
ok github.com/PuerkitoBio/goquery 163.718s

85
bench/v1.0.1a-go1.7 Normal file
View File

@@ -0,0 +1,85 @@
BenchmarkFirst-4 30000000 50.9 ns/op 48 B/op 1 allocs/op
BenchmarkLast-4 30000000 50.0 ns/op 48 B/op 1 allocs/op
BenchmarkEq-4 30000000 50.5 ns/op 48 B/op 1 allocs/op
BenchmarkSlice-4 500000000 3.53 ns/op 0 B/op 0 allocs/op
BenchmarkGet-4 2000000000 1.66 ns/op 0 B/op 0 allocs/op
BenchmarkIndex-4 2000000 832 ns/op 248 B/op 10 allocs/op
BenchmarkIndexSelector-4 100000 16073 ns/op 3839 B/op 21 allocs/op
BenchmarkIndexOfNode-4 200000000 6.38 ns/op 0 B/op 0 allocs/op
BenchmarkIndexOfSelection-4 200000000 7.14 ns/op 0 B/op 0 allocs/op
BenchmarkMetalReviewExample-4 10000 140737 ns/op 12418 B/op 320 allocs/op
BenchmarkAdd-4 100000 13162 ns/op 974 B/op 10 allocs/op
BenchmarkAddSelection-4 500000 3160 ns/op 814 B/op 2 allocs/op
BenchmarkAddNodes-4 500000 3159 ns/op 814 B/op 2 allocs/op
BenchmarkAndSelf-4 200000 7423 ns/op 2404 B/op 9 allocs/op
BenchmarkFilter-4 100000 19671 ns/op 360 B/op 8 allocs/op
BenchmarkNot-4 100000 22577 ns/op 136 B/op 5 allocs/op
BenchmarkFilterFunction-4 50000 33960 ns/op 22976 B/op 755 allocs/op
BenchmarkNotFunction-4 50000 37909 ns/op 29120 B/op 757 allocs/op
BenchmarkFilterNodes-4 50000 34196 ns/op 20960 B/op 749 allocs/op
BenchmarkNotNodes-4 30000 40446 ns/op 29120 B/op 757 allocs/op
BenchmarkFilterSelection-4 50000 33091 ns/op 20960 B/op 749 allocs/op
BenchmarkNotSelection-4 30000 40609 ns/op 29120 B/op 757 allocs/op
BenchmarkHas-4 5000 262936 ns/op 2371 B/op 50 allocs/op
BenchmarkHasNodes-4 10000 148631 ns/op 21184 B/op 752 allocs/op
BenchmarkHasSelection-4 10000 153117 ns/op 21184 B/op 752 allocs/op
BenchmarkEnd-4 2000000000 1.02 ns/op 0 B/op 0 allocs/op
BenchmarkEach-4 300000 4653 ns/op 3304 B/op 118 allocs/op
BenchmarkMap-4 200000 8257 ns/op 5572 B/op 184 allocs/op
BenchmarkEachWithBreak-4 2000000 806 ns/op 560 B/op 20 allocs/op
BenchmarkAttr-4 100000000 22.0 ns/op 0 B/op 0 allocs/op
BenchmarkText-4 200000 8913 ns/op 7536 B/op 110 allocs/op
BenchmarkLength-4 2000000000 0.35 ns/op 0 B/op 0 allocs/op
BenchmarkHtml-4 5000000 398 ns/op 120 B/op 2 allocs/op
BenchmarkIs-4 100000 22392 ns/op 88 B/op 4 allocs/op
BenchmarkIsPositional-4 50000 26259 ns/op 1112 B/op 10 allocs/op
BenchmarkIsFunction-4 1000000 1212 ns/op 784 B/op 28 allocs/op
BenchmarkIsSelection-4 50000 33222 ns/op 20960 B/op 749 allocs/op
BenchmarkIsNodes-4 50000 33408 ns/op 20960 B/op 749 allocs/op
BenchmarkHasClass-4 10000 233208 ns/op 14944 B/op 976 allocs/op
BenchmarkContains-4 200000000 7.57 ns/op 0 B/op 0 allocs/op
BenchmarkFind-4 100000 16121 ns/op 3839 B/op 21 allocs/op
BenchmarkFindWithinSelection-4 20000 68019 ns/op 11521 B/op 97 allocs/op
BenchmarkFindSelection-4 5000 387582 ns/op 59787 B/op 176 allocs/op
BenchmarkFindNodes-4 5000 389246 ns/op 59797 B/op 176 allocs/op
BenchmarkContents-4 200000 11475 ns/op 2878 B/op 42 allocs/op
BenchmarkContentsFiltered-4 200000 11222 ns/op 2498 B/op 46 allocs/op
BenchmarkChildren-4 2000000 650 ns/op 152 B/op 7 allocs/op
BenchmarkChildrenFiltered-4 500000 2568 ns/op 352 B/op 15 allocs/op
BenchmarkParent-4 2000 702513 ns/op 194478 B/op 828 allocs/op
BenchmarkParentFiltered-4 2000 690778 ns/op 194658 B/op 835 allocs/op
BenchmarkParents-4 10000 124855 ns/op 49869 B/op 868 allocs/op
BenchmarkParentsFiltered-4 10000 128535 ns/op 50456 B/op 876 allocs/op
BenchmarkParentsUntil-4 20000 72982 ns/op 23802 B/op 388 allocs/op
BenchmarkParentsUntilSelection-4 10000 156099 ns/op 72453 B/op 1549 allocs/op
BenchmarkParentsUntilNodes-4 10000 156610 ns/op 72455 B/op 1549 allocs/op
BenchmarkParentsFilteredUntil-4 100000 15549 ns/op 4068 B/op 94 allocs/op
BenchmarkParentsFilteredUntilSelection-4 100000 20564 ns/op 7276 B/op 198 allocs/op
BenchmarkParentsFilteredUntilNodes-4 100000 20635 ns/op 7276 B/op 198 allocs/op
BenchmarkSiblings-4 3000 565114 ns/op 205910 B/op 336 allocs/op
BenchmarkSiblingsFiltered-4 3000 580264 ns/op 206993 B/op 345 allocs/op
BenchmarkNext-4 20000 93177 ns/op 26810 B/op 169 allocs/op
BenchmarkNextFiltered-4 20000 94171 ns/op 27013 B/op 175 allocs/op
BenchmarkNextAll-4 5000 270320 ns/op 89289 B/op 237 allocs/op
BenchmarkNextAllFiltered-4 5000 275283 ns/op 90375 B/op 246 allocs/op
BenchmarkPrev-4 20000 92777 ns/op 26810 B/op 169 allocs/op
BenchmarkPrevFiltered-4 20000 95577 ns/op 27007 B/op 175 allocs/op
BenchmarkPrevAll-4 20000 86339 ns/op 27515 B/op 151 allocs/op
BenchmarkPrevAllFiltered-4 20000 87759 ns/op 27715 B/op 157 allocs/op
BenchmarkNextUntil-4 10000 163930 ns/op 48541 B/op 330 allocs/op
BenchmarkNextUntilSelection-4 30000 56382 ns/op 23880 B/op 556 allocs/op
BenchmarkNextUntilNodes-4 100000 18883 ns/op 8703 B/op 252 allocs/op
BenchmarkPrevUntil-4 3000 484668 ns/op 145402 B/op 611 allocs/op
BenchmarkPrevUntilSelection-4 20000 72125 ns/op 28865 B/op 705 allocs/op
BenchmarkPrevUntilNodes-4 100000 14722 ns/op 6510 B/op 205 allocs/op
BenchmarkNextFilteredUntil-4 50000 39006 ns/op 10990 B/op 192 allocs/op
BenchmarkNextFilteredUntilSelection-4 20000 66048 ns/op 25641 B/op 586 allocs/op
BenchmarkNextFilteredUntilNodes-4 20000 65314 ns/op 25640 B/op 586 allocs/op
BenchmarkPrevFilteredUntil-4 50000 33312 ns/op 9709 B/op 189 allocs/op
BenchmarkPrevFilteredUntilSelection-4 20000 64197 ns/op 24981 B/op 601 allocs/op
BenchmarkPrevFilteredUntilNodes-4 20000 64505 ns/op 24982 B/op 601 allocs/op
BenchmarkClosest-4 500000 4065 ns/op 160 B/op 8 allocs/op
BenchmarkClosestSelection-4 2000000 756 ns/op 96 B/op 6 allocs/op
BenchmarkClosestNodes-4 2000000 753 ns/op 96 B/op 6 allocs/op
PASS
ok github.com/PuerkitoBio/goquery 162.053s

View File

@@ -114,9 +114,14 @@ func indexInSlice(slice []*html.Node, node *html.Node) int {
// contain duplicates. The target slice is returned because append() may create
// a new underlying array.
func appendWithoutDuplicates(target []*html.Node, nodes []*html.Node) []*html.Node {
targetSet := make(map[*html.Node]bool, len(target))
for _, n := range target {
targetSet[n] = true
}
for _, n := range nodes {
if !isInSlice(target, n) {
if !targetSet[n] {
target = append(target, n)
targetSet[n] = true
}
}