Skip to content

Commit

Permalink
add external probes
Browse files Browse the repository at this point in the history
Signed-off-by: kitfoman <thaddeusgetachew@gmail.com>

make timeout flags backwards compatible

Signed-off-by: kitfoman <thaddeusgetachew@gmail.com>
  • Loading branch information
getact authored and kitfoman committed May 9, 2022
1 parent 1a9ae3a commit e837ac4
Show file tree
Hide file tree
Showing 29 changed files with 607 additions and 185 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,27 @@ and `goldpinger` should show something like this:

![screenshot-DNS-resolution](./extras/dns-screenshot.png)

### TCP and HTTP checks to external targets

Instances can also be configured to do simple TCP or HTTP checks on external targets. This is useful for visualizing more nuanced connectivity flows.

```sh
--tcp-targets= A list of external targets(<host>:<port> or <ip>:<port>) to attempt a TCP check on (space delimited) [$TCP_TARGETS]
--http-targets= A list of external targets(<http or https>://<url>) to attempt an HTTP{S} check on. A 200 HTTP code is considered successful. (space delimited) [$HTTP_TARGETS]
--tcp-targets-timeout= The timeout for a tcp check on the provided tcp-targets (default: 500) [$TCP_TARGETS_TIMEOUT]
--dns-targets-timeout= The timeout for a tcp check on the provided udp-targets (default: 500) [$DNS_TARGETS_TIMEOUT]
```

```yaml
- name: HTTP_TARGETS
value: http://bloomberg.com
- name: TCP_TARGETS
value: 10.34.5.141:5000 10.34.195.193:6442
```

the timeouts for the TCP, DNS and HTTP checks can be configured via `TCP_TARGETS_TIMEOUT`, `DNS_TARGETS_TIMEOUT` and `HTTP_TARGETS_TIMEOUT` respectively.

![screenshot-tcp-http-checks](./extras/tcp-checks-screenshot.png)

## Usage

Expand Down
17 changes: 17 additions & 0 deletions cmd/goldpinger/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,23 @@ func main() {
logger.Error("Unknown IP version specified: expected values are 4 or 6", zap.Strings("IPVersions", goldpinger.GoldpingerConfig.IPVersions))
}

// Handle deprecated flags
if int(goldpinger.GoldpingerConfig.PingTimeout) == 0 {
logger.Warn("ping-timeout-ms is deprecated in favor of ping-timeout and will be removed in the future",
zap.Int64("ping-timeout-ms", goldpinger.GoldpingerConfig.PingTimeoutMs))
goldpinger.GoldpingerConfig.PingTimeout = time.Duration(goldpinger.GoldpingerConfig.PingTimeoutMs) * time.Millisecond
}
if int(goldpinger.GoldpingerConfig.CheckTimeout) == 0 {
logger.Warn("check-timeout-ms is deprecated in favor of check-timeout and will be removed in the future",
zap.Int64("check-timeout-ms", goldpinger.GoldpingerConfig.CheckTimeoutMs))
goldpinger.GoldpingerConfig.CheckTimeout = time.Duration(goldpinger.GoldpingerConfig.CheckTimeoutMs) * time.Millisecond
}
if int(goldpinger.GoldpingerConfig.CheckAllTimeout) == 0 {
logger.Warn("check-all-timeout-ms is deprecated in favor of check-all-timeout will be removed in the future",
zap.Int64("check-all-timeout-ms", goldpinger.GoldpingerConfig.CheckAllTimeoutMs))
goldpinger.GoldpingerConfig.CheckAllTimeout = time.Duration(goldpinger.GoldpingerConfig.CheckAllTimeoutMs) * time.Millisecond
}

server.ConfigureAPI()
goldpinger.StartUpdater()

Expand Down
Binary file added extras/tcp-checks-screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.15
require (
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 // indirect
github.com/cespare/xxhash v1.1.0
github.com/go-openapi/errors v0.20.0
github.com/go-openapi/errors v0.20.2
github.com/go-openapi/loads v0.19.5
github.com/go-openapi/runtime v0.19.26
github.com/go-openapi/spec v0.19.8
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ github.com/go-openapi/errors v0.19.6/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpX
github.com/go-openapi/errors v0.19.8/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M=
github.com/go-openapi/errors v0.20.0 h1:Sxpo9PjEHDzhs3FbnGNonvDgWcMW2U7wGTcDDSFSceM=
github.com/go-openapi/errors v0.20.0/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M=
github.com/go-openapi/errors v0.20.2 h1:dxy7PGTqEh94zj2E3h1cUmQQWiM1+aeCROfAr02EmK8=
github.com/go-openapi/errors v0.20.2/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M=
github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0=
github.com/go-openapi/jsonpointer v0.17.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M=
github.com/go-openapi/jsonpointer v0.18.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M=
Expand Down
81 changes: 57 additions & 24 deletions pkg/goldpinger/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,12 @@ func CheckNeighbours(ctx context.Context) *models.CheckResults {
// Mux to prevent concurrent map address
checkResultsMux.Lock()
defer checkResultsMux.Unlock()

final := models.CheckResults{}
final.PodResults = make(map[string]models.PodResult)
for podName, podResult := range checkResults.PodResults {
final.PodResults[podName] = podResult
}
if len(GoldpingerConfig.DnsHosts) > 0 {
final.DNSResults = *checkDNS()
}
final.ProbeResults = checkTargets()
return &final
}

Expand Down Expand Up @@ -127,22 +124,58 @@ func pickPodHostIP(podIP, hostIP string) string {
return podIP
}

func checkDNS() *models.DNSResults {
results := models.DNSResults{}
for _, host := range GoldpingerConfig.DnsHosts {
func checkTargets() models.ProbeResults {
results := make(map[string][]models.ProbeResult)
probes := []struct {
protocol string
hosts []string
probeFn func(addr string, timeout time.Duration) error
statFn func(host string)
timeout time.Duration
}{
{
protocol: "dns",
hosts: GoldpingerConfig.DnsHosts,
probeFn: doDNSProbe,
statFn: CountDnsError,
timeout: GoldpingerConfig.DnsCheckTimeout,
},
{
protocol: "http",
hosts: GoldpingerConfig.HTTPTargets,
probeFn: doHTTPProbe,
statFn: CountHttpError,
timeout: GoldpingerConfig.HTTPCheckTimeout,
},
{
protocol: "tcp",
hosts: GoldpingerConfig.TCPTargets,
probeFn: doTCPProbe,
statFn: CountTcpError,
timeout: GoldpingerConfig.TCPCheckTimeout,
},
}

for _, probe := range probes {
for _, host := range probe.hosts {
if _, ok := results[host]; !ok {
results[host] = []models.ProbeResult{}
}

var dnsResult models.DNSResult
res := models.ProbeResult{Protocol: probe.protocol}
start := time.Now()
err := probe.probeFn(host, probe.timeout)
if err != nil {
res.Error = err.Error()
probe.statFn(host)
}

start := time.Now()
_, err := net.LookupIP(host)
if err != nil {
dnsResult.Error = err.Error()
CountDnsError(host)
res.ResponseTimeMs = time.Since(start).Milliseconds()
results[host] = append(results[host], res)
}
dnsResult.ResponseTimeMs = time.Since(start).Nanoseconds() / int64(time.Millisecond)
results[host] = dnsResult
}
return &results

return results
}

// CheckServicePodsResult results of the /check operation
Expand Down Expand Up @@ -195,7 +228,7 @@ func CheckAllPods(checkAllCtx context.Context, pods map[string]*GoldpingerPod) *
} else {
checkCtx, cancel := context.WithTimeout(
checkAllCtx,
time.Duration(GoldpingerConfig.CheckTimeoutMs)*time.Millisecond,
GoldpingerConfig.CheckTimeout,
)
defer cancel()

Expand Down Expand Up @@ -238,15 +271,15 @@ func CheckAllPods(checkAllCtx context.Context, pods map[string]*GoldpingerPod) *
PodIP: response.podIPv4,
})
if response.checkAllPodResult.Response != nil &&
response.checkAllPodResult.Response.DNSResults != nil {
if result.DNSResults == nil {
result.DNSResults = make(map[string]models.DNSResults)
response.checkAllPodResult.Response.ProbeResults != nil {
if result.ProbeResults == nil {
result.ProbeResults = make(map[string]models.ProbeResults)
}
for host := range response.checkAllPodResult.Response.DNSResults {
if result.DNSResults[host] == nil {
result.DNSResults[host] = make(map[string]models.DNSResult)
for host := range response.checkAllPodResult.Response.ProbeResults {
if result.ProbeResults[host] == nil {
result.ProbeResults[host] = make(map[string][]models.ProbeResult)
}
result.DNSResults[host][response.podName] = response.checkAllPodResult.Response.DNSResults[host]
result.ProbeResults[host][response.podName] = response.checkAllPodResult.Response.ProbeResults[host]
}
}
}
Expand Down
18 changes: 14 additions & 4 deletions pkg/goldpinger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package goldpinger

import (
"time"

"k8s.io/client-go/kubernetes"
)

Expand All @@ -35,12 +37,20 @@ var GoldpingerConfig = struct {
DisplayNodeName bool `long:"display-nodename" description:"Display nodename other than podname in UI (defaults is podname)." env:"DISPLAY_NODENAME"`
KubernetesClient *kubernetes.Clientset

DnsHosts []string `long:"host-to-resolve" description:"A host to attempt dns resolve on (space delimited)" env:"HOSTS_TO_RESOLVE" env-delim:" "`
DnsHosts []string `long:"host-to-resolve" description:"A host to attempt dns resolve on (space delimited)" env:"HOSTS_TO_RESOLVE" env-delim:" "`
TCPTargets []string `long:"tcp-targets" description:"A list of external targets(<host>:<port> or <ip>:<port>) to attempt a TCP check on (space delimited)" env:"TCP_TARGETS" env-delim:" "`
HTTPTargets []string `long:"http-targets" description:"A list of external targets(<http or https>://<url>) to attempt an HTTP{S} check on. A 200 HTTP code is considered successful.(space delimited)" env:"HTTP_TARGETS" env-delim:" "`

IPVersions []string `long:"ip-versions" description:"The IP versions to use (space delimited). Possible values are 4 and 6 (defaults to 4)." env:"IP_VERSIONS" env-delim:" "`

// Timeouts
PingTimeoutMs int64 `long:"ping-timeout-ms" description:"The timeout in milliseconds for a ping call to other goldpinger pods" env:"PING_TIMEOUT_MS" default:"300"`
CheckTimeoutMs int64 `long:"check-timeout-ms" description:"The timeout in milliseconds for a check call to other goldpinger pods" env:"CHECK_TIMEOUT_MS" default:"1000"`
CheckAllTimeoutMs int64 `long:"check-all-timeout-ms" description:"The timeout in milliseconds for a check-all call to other goldpinger pods" env:"CHECK_ALL_TIMEOUT_MS" default:"5000"`
PingTimeoutMs int64 `long:"ping-timeout-ms" description:"The timeout in milliseconds for a ping call to other goldpinger pods(deprecated)" env:"PING_TIMEOUT_MS" default:"300"`
CheckTimeoutMs int64 `long:"check-timeout-ms" description:"The timeout in milliseconds for a check call to other goldpinger pods(deprecated)" env:"CHECK_TIMEOUT_MS" default:"1000"`
CheckAllTimeoutMs int64 `long:"check-all-timeout-ms" description:"The timeout in milliseconds for a check-all call to other goldpinger pods(deprecated)" env:"CHECK_ALL_TIMEOUT_MS" default:"5000"`
PingTimeout time.Duration `long:"ping-timeout" description:"The timeout for a ping call to other goldpinger pods" env:"PING_TIMEOUT" default:"300ms"`
CheckTimeout time.Duration `long:"check-timeout" description:"The timeout for a check call to other goldpinger pods" env:"CHECK_TIMEOUT" default:"1000ms"`
CheckAllTimeout time.Duration `long:"check-all-timeout" description:"The timeout for a check-all call to other goldpinger pods" env:"CHECK_ALL_TIMEOUT" default:"5000ms"`
TCPCheckTimeout time.Duration `long:"tcp-targets-timeout" description:"The timeout for a tcp check on the provided tcp-targets" env:"TCP_TARGETS_TIMEOUT" default:"500ms"`
DnsCheckTimeout time.Duration `long:"dns-targets-timeout" description:"The timeout for a dns check on the provided dns-targets" env:"DNS_TARGETS_TIMEOUT" default:"500ms"`
HTTPCheckTimeout time.Duration `long:"http-targets-timeout" description:"The timeout for a http check on the provided http-targets" env:"HTTP_TARGETS_TIMEOUT" default:"500ms"`
}{}
3 changes: 1 addition & 2 deletions pkg/goldpinger/heatmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"net/http"
"sort"
"strconv"
"time"

"go.uber.org/zap"
"golang.org/x/image/font"
Expand Down Expand Up @@ -83,7 +82,7 @@ func HeatmapHandler(w http.ResponseWriter, r *http.Request) {

ctx, cancel := context.WithTimeout(
r.Context(),
time.Duration(GoldpingerConfig.CheckAllTimeoutMs)*time.Millisecond,
GoldpingerConfig.CheckAllTimeout,
)
defer cancel()

Expand Down
2 changes: 1 addition & 1 deletion pkg/goldpinger/pinger.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ type Pinger struct {
func NewPinger(pod *GoldpingerPod, resultsChan chan<- PingAllPodsResult) *Pinger {
p := Pinger{
pod: pod,
timeout: time.Duration(GoldpingerConfig.PingTimeoutMs) * time.Millisecond,
timeout: GoldpingerConfig.PingTimeout,
resultsChan: resultsChan,
stopChan: make(chan struct{}),

Expand Down
72 changes: 72 additions & 0 deletions pkg/goldpinger/probes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2018 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package goldpinger

import (
"context"
"crypto/tls"
"fmt"
"net"
"net/http"
"net/url"
"time"
)

func doDNSProbe(addr string, timeout time.Duration) error {
resolver := net.Resolver{}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ips, err := resolver.LookupHost(ctx, addr)
if len(ips) == 0 {
return fmt.Errorf("%s was resolved to 0 ips", addr)
}
return err
}

func doTCPProbe(addr string, timeout time.Duration) error {
conn, err := net.DialTimeout("tcp", addr, timeout)
if conn != nil {
defer conn.Close()
}
return err
}

func doHTTPProbe(addr string, timeout time.Duration) error {
client := http.Client{Timeout: timeout}
u, err := url.Parse(addr)
if err != nil {
return err
}
if u.Scheme != "http" && u.Scheme != "https" {
return fmt.Errorf("invalid url scheme: '%s' in address", u.Scheme)
}
if u.Scheme == "https" {
client.Transport = &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true,
},
}
}
resp, err := client.Get(addr)
if err != nil {
return err
}

defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("%s returned non-200 resp: %d", addr, resp.StatusCode)
}
return err
}
39 changes: 38 additions & 1 deletion pkg/goldpinger/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,26 @@ var (
"host",
},
)

goldPingerTcpErrorsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "goldpinger_tcp_errors_total",
Help: "Statistics of TCP probe errors per instance",
},
[]string{
"goldpinger_instance",
"host",
},
)
goldPingerHttpErrorsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "goldpinger_http_errors_total",
Help: "Statistics of HTTP probe errors per instance",
},
[]string{
"goldpinger_instance",
"host",
},
)
bootTime = time.Now()
)

Expand All @@ -115,6 +134,8 @@ func init() {
prometheus.MustRegister(goldpingerResponseTimeKubernetesHistogram)
prometheus.MustRegister(goldpingerErrorsCounter)
prometheus.MustRegister(goldpingerDnsErrorsCounter)
prometheus.MustRegister(goldPingerHttpErrorsCounter)
prometheus.MustRegister(goldPingerTcpErrorsCounter)
zap.L().Info("Metrics setup - see /metrics")
}

Expand Down Expand Up @@ -173,6 +194,22 @@ func CountDnsError(host string) {
).Inc()
}

// CountTcpError counts instances of tcp errors for prober
func CountTcpError(host string) {
goldPingerTcpErrorsCounter.WithLabelValues(
GoldpingerConfig.Hostname,
host,
).Inc()
}

// CountHttpError counts instances of tcp errors for prober
func CountHttpError(host string) {
goldPingerHttpErrorsCounter.WithLabelValues(
GoldpingerConfig.Hostname,
host,
).Inc()
}

// returns a timer for easy observing of the durations of calls to kubernetes API
func GetLabeledKubernetesCallsTimer() *prometheus.Timer {
return prometheus.NewTimer(
Expand Down
Loading

0 comments on commit e837ac4

Please sign in to comment.