Skip to content

Commit

Permalink
Merge pull request #3 from the-go-tool/rc
Browse files Browse the repository at this point in the history
DuckDuckGo unofficial search provider added
  • Loading branch information
jkulvich authored Jul 25, 2020
2 parents d582d9b + eb00acb commit f523445
Show file tree
Hide file tree
Showing 8 changed files with 225 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ go:
- master

script:
- go test ./
- go test ./...
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@ Qwant, DuckDuckGo and so on.

Supports now:
- [X] Unofficial Qwant
- [ ] Qwant
- [ ] Unofficial DuckDuckGo
- [ ] DuckDuckGo
- [X] Unofficial DuckDuckGo
- [ ] Unofficial Google
- [ ] More: Google, Yandex, Bing, Yahoo etc

## :fast_forward: Fast Start
Expand Down
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ module websearch

go 1.14

require golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208
require (
github.com/PuerkitoBio/goquery v1.5.1
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208
)
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,2 +1,12 @@
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208 h1:qwRHBd0NqMbJxfbotnDhm2ByMI1Shq4Y6oRJo21SGJA=
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
27 changes: 27 additions & 0 deletions helpers/params.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package helpers

import (
"fmt"
"net/url"
"strings"
)

// Merges string:string sets params
func ParamsMerge(sets ...map[string]string) map[string]string {
result := map[string]string{}
for i := range sets {
for key, val := range sets[i] {
result[key] = val
}
}
return result
}

// Renders params to query string
func ParamsRender(params map[string]string) string {
parts := make([]string, 0, len(params))
for key, val := range params {
parts = append(parts, fmt.Sprintf("%s=%s", url.QueryEscape(key), url.QueryEscape(val)))
}
return strings.Join(parts, "&")
}
66 changes: 46 additions & 20 deletions helpers/request.go
Original file line number Diff line number Diff line change
@@ -1,28 +1,53 @@
package helpers

import (
"bytes"
"encoding/json"
"fmt"
"github.com/PuerkitoBio/goquery"
"io/ioutil"
"net/http"
"net/url"
"strings"
)

// Makes HTTP request, handles errs and parses result
func RequestJSON(result interface{}, url url.URL) error {
resp, err := http.Get(url.String())
// Makes HTTP request, handles errs and returns raw result
func Request(method string, url url.URL, headers map[string]string) ([]byte, error) {
// Custom request
req, err := http.NewRequest(method, url.String(), strings.NewReader("q=test&b="))
if err != nil {
return err
return nil, err
}

// Custom headers
for key, val := range headers {
req.Header.Add(key, val)
}

// Makes request
client := http.DefaultClient
resp, err := client.Do(req)
if err != nil {
return nil, err
}

// If status code isn't 2xx
if resp.StatusCode/100 != http.StatusOK/100 {
return fmt.Errorf("unexpected status code: %d", resp.StatusCode)
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}

// Answer reading
data, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}

return data, nil
}

// Makes HTTP request and parses result as JSON
func RequestJSON(result interface{}, url url.URL) error {
data, err := Request("GET", url, nil)
if err != nil {
return err
}
Expand All @@ -35,22 +60,23 @@ func RequestJSON(result interface{}, url url.URL) error {
return nil
}

// Merges string:string sets params
func ParamsMerge(sets ...map[string]string) map[string]string {
result := map[string]string{}
for i := range sets {
for key, val := range sets[i] {
result[key] = val
}
// Makes HTTP request and parses result as HTML
func RequestHTML(method string, url url.URL) (*goquery.Document, error) {
headers := map[string]string{
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/83.0.4103.61 Chrome/83.0.4103.61 Safari/537.36",
}
return result
}

// Renders params to query string
func ParamsRender(params map[string]string) string {
parts := make([]string, 0, len(params))
for key, val := range params {
parts = append(parts, fmt.Sprintf("%s=%s", url.QueryEscape(key), url.QueryEscape(val)))
data, err := Request(method, url, headers)
if err != nil {
return nil, err
}
return strings.Join(parts, "&")

// Load the HTML document
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(data))
if err != nil {
return nil, err
}

return doc, nil
}
116 changes: 116 additions & 0 deletions provider/unofficial_duckduckgo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package provider

import (
"github.com/PuerkitoBio/goquery"
"net/url"
"websearch/helpers"
)

// The unofficial DuckDuckGo provider name
const ProviderUnofficialDuckDuckGo = ProviderName("unofficial_duckduckgo")

// The unofficial DuckDuckGo web search provider
type UnofficialDuckDuckGo struct {
api url.URL
}

// The config for unofficial DuckDuckGo provider
type UnofficialDuckDuckGoConfig struct{}

// Makes a new unofficial DuckDuckGo web search provider
func NewUnofficialDuckDuckGo(config ...UnofficialDuckDuckGoConfig) UnofficialDuckDuckGo {
api := url.URL{
Scheme: "https",
Host: "html.duckduckgo.com",
Path: "/html/",
}

return UnofficialDuckDuckGo{
api: api,
}
}

// Makes web search
func (engine UnofficialDuckDuckGo) Search(query string, count int) (Results, error) {
results := make(Results, 0, count)

var res Results
var err error
var paramsNext map[string]string

// Initial request with first page
res, paramsNext, err = engine.nextSearch(map[string]string{
"q": query,
})
if err != nil {
return nil, err
}
results = append(results, res...)

// Next page results cycle
for {
if len(results) >= count {
break
}
res, paramsNext, err = engine.nextSearch(paramsNext)
if err != nil {
return nil, err
}
results = append(results, res...)
}

return results[:count], nil
}

// Returns provider name
func (engine UnofficialDuckDuckGo) Name() ProviderName {
return ProviderUnofficialDuckDuckGo
}

func (engine UnofficialDuckDuckGo) nextSearch(form map[string]string) (Results, map[string]string, error) {
api := engine.api
api.RawQuery = helpers.ParamsRender(form)

// Gets response
doc, err := helpers.RequestHTML("POST", api)
if err != nil {
return nil, nil, err
}

// Fetching results
docResults := doc.Find("#links.results .result")
results := make(Results, 0, docResults.Length())
docResults.Map(func(i int, selection *goquery.Selection) string {
title := selection.Find(".result__title a").Text()
link, _ := selection.Find(".result__title a").Attr("href")
desc := selection.Find(".result__snippet").Text()

u, _ := url.Parse(link)
results = append(results, Result{
Title: title,
Description: desc,
Link: *u,
})

return ""
})

// Fetching next page params
paramsNext := map[string]string{}
navLinks := doc.Find(".nav-link")
navLink := navLinks.Get(0)
if navLinks.Length() == 2 {
navLink = navLinks.Get(1)
}
inputs := goquery.NewDocumentFromNode(navLink).Find("form input")
inputs.Map(func(i int, selection *goquery.Selection) string {
name, _ := selection.Attr("name")
value, _ := selection.Attr("value")
if len(name) > 0 {
paramsNext[name] = value
}
return ""
})

return results, paramsNext, nil
}
19 changes: 19 additions & 0 deletions provider/unofficial_duckduckgo_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package provider

import "testing"

func TestNewUnofficialDuckDuckGo(t *testing.T) {
uduckduckgo := NewUnofficialDuckDuckGo()
res, err := uduckduckgo.Search("test", 65)
if err != nil {
t.Fatalf("search error: %s", err)
}
if len(res) != 65 {
t.Fatalf("incorrect results count, expect 65, got %d", len(res))
}
for _, item := range res {
if len(item.Title) == 0 {
t.Fatalf("empty title")
}
}
}

0 comments on commit f523445

Please sign in to comment.