Skip to content

Commit

Permalink
add external probes
Browse files Browse the repository at this point in the history
Signed-off-by: kitfoman <thaddeusgetachew@gmail.com>
  • Loading branch information
getact authored and kitfoman committed Mar 4, 2022
1 parent 9496562 commit 0b7bf0f
Show file tree
Hide file tree
Showing 29 changed files with 589 additions and 186 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,27 @@ and `goldpinger` should show something like this:

![screenshot-DNS-resolution](./extras/dns-screenshot.png)

### TCP and HTTP checks to external targets

Instances can also be configured to do simple TCP or HTTP checks on external targets. This is useful for visualizing more nuanced connectivity flows.

```sh
--tcp-targets= A list of external targets(<host>:<port> or <ip>:<port>) to attempt a TCP check on (space delimited) [$TCP_TARGETS]
--http-targets= A list of external targets(<http/https:<host>:<path>) to attempt a http check on (space delimited) [$HTTP_TARGETS]
--tcp-targets-timeout= The timeout for a tcp check on the provided tcp-targets (default: 500) [$TCP_TARGETS_TIMEOUT]
--dns-targets-timeout= The timeout for a tcp check on the provided udp-targets (default: 500) [$DNS_TARGETS_TIMEOUT]
```

```yaml
- name: HTTP_TARGETS
value: http://bloomberg.com
- name: TCP_TARGETS
value: 10.34.5.141:5000 10.34.195.193:6442
```

the timeouts for the TCP, DNS and HTTP checks can be configured via `TCP_TARGETS_TIMEOUT`, `DNS_TARGETS_TIMEOUT` and `HTTP_TARGETS_TIMEOUT` respectively.

![screenshot-tcp-http-checks](./extras/tcp-checks-screenshot.png)

## Usage

Expand Down
Binary file added extras/tcp-checks-screenshot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.15
require (
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 // indirect
github.com/cespare/xxhash v1.1.0
github.com/go-openapi/errors v0.20.0
github.com/go-openapi/errors v0.20.2
github.com/go-openapi/loads v0.19.5
github.com/go-openapi/runtime v0.19.26
github.com/go-openapi/spec v0.19.8
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ github.com/go-openapi/errors v0.19.6/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpX
github.com/go-openapi/errors v0.19.8/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M=
github.com/go-openapi/errors v0.20.0 h1:Sxpo9PjEHDzhs3FbnGNonvDgWcMW2U7wGTcDDSFSceM=
github.com/go-openapi/errors v0.20.0/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M=
github.com/go-openapi/errors v0.20.2 h1:dxy7PGTqEh94zj2E3h1cUmQQWiM1+aeCROfAr02EmK8=
github.com/go-openapi/errors v0.20.2/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M=
github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0=
github.com/go-openapi/jsonpointer v0.17.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M=
github.com/go-openapi/jsonpointer v0.18.0/go.mod h1:cOnomiV+CVVwFLk0A/MExoFMjwdsUdVpsRhURCKh+3M=
Expand Down
81 changes: 57 additions & 24 deletions pkg/goldpinger/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,12 @@ func CheckNeighbours(ctx context.Context) *models.CheckResults {
// Mux to prevent concurrent map address
checkResultsMux.Lock()
defer checkResultsMux.Unlock()

final := models.CheckResults{}
final.PodResults = make(map[string]models.PodResult)
for podName, podResult := range checkResults.PodResults {
final.PodResults[podName] = podResult
}
if len(GoldpingerConfig.DnsHosts) > 0 {
final.DNSResults = *checkDNS()
}
final.ProbeResults = checkTargets()
return &final
}

Expand Down Expand Up @@ -127,22 +124,58 @@ func pickPodHostIP(podIP, hostIP string) string {
return podIP
}

func checkDNS() *models.DNSResults {
results := models.DNSResults{}
for _, host := range GoldpingerConfig.DnsHosts {
func checkTargets() models.ProbeResults {
results := make(map[string][]models.ProbeResult)
probes := []struct {
protocol string
hosts []string
probeFn func(addr string, timeout time.Duration) error
statFn func(host string)
timeout time.Duration
}{
{
protocol: "dns",
hosts: GoldpingerConfig.DnsHosts,
probeFn: doDNSProbe,
statFn: CountDnsError,
timeout: GoldpingerConfig.DnsCheckTimeout,
},
{
protocol: "http",
hosts: GoldpingerConfig.HTTPTargets,
probeFn: doHTTPProbe,
statFn: CountHttpError,
timeout: GoldpingerConfig.HTTPCheckTimeout,
},
{
protocol: "tcp",
hosts: GoldpingerConfig.TCPTargets,
probeFn: doTCPProbe,
statFn: CountTcpError,
timeout: GoldpingerConfig.TCPCheckTimeout,
},
}

for _, probe := range probes {
for _, host := range probe.hosts {
if _, ok := results[host]; !ok {
results[host] = []models.ProbeResult{}
}

var dnsResult models.DNSResult
res := models.ProbeResult{Protocol: probe.protocol}
start := time.Now()
err := probe.probeFn(host, probe.timeout)
if err != nil {
res.Error = err.Error()
probe.statFn(host)
}

start := time.Now()
_, err := net.LookupIP(host)
if err != nil {
dnsResult.Error = err.Error()
CountDnsError(host)
res.ResponseTimeMs = time.Since(start).Milliseconds()
results[host] = append(results[host], res)
}
dnsResult.ResponseTimeMs = time.Since(start).Nanoseconds() / int64(time.Millisecond)
results[host] = dnsResult
}
return &results

return results
}

// CheckServicePodsResult results of the /check operation
Expand Down Expand Up @@ -195,7 +228,7 @@ func CheckAllPods(checkAllCtx context.Context, pods map[string]*GoldpingerPod) *
} else {
checkCtx, cancel := context.WithTimeout(
checkAllCtx,
time.Duration(GoldpingerConfig.CheckTimeoutMs)*time.Millisecond,
GoldpingerConfig.CheckTimeout,
)
defer cancel()

Expand Down Expand Up @@ -238,15 +271,15 @@ func CheckAllPods(checkAllCtx context.Context, pods map[string]*GoldpingerPod) *
PodIP: response.podIPv4,
})
if response.checkAllPodResult.Response != nil &&
response.checkAllPodResult.Response.DNSResults != nil {
if result.DNSResults == nil {
result.DNSResults = make(map[string]models.DNSResults)
response.checkAllPodResult.Response.ProbeResults != nil {
if result.ProbeResults == nil {
result.ProbeResults = make(map[string]models.ProbeResults)
}
for host := range response.checkAllPodResult.Response.DNSResults {
if result.DNSResults[host] == nil {
result.DNSResults[host] = make(map[string]models.DNSResult)
for host := range response.checkAllPodResult.Response.ProbeResults {
if result.ProbeResults[host] == nil {
result.ProbeResults[host] = make(map[string][]models.ProbeResult)
}
result.DNSResults[host][response.podName] = response.checkAllPodResult.Response.DNSResults[host]
result.ProbeResults[host][response.podName] = response.checkAllPodResult.Response.ProbeResults[host]
}
}
}
Expand Down
15 changes: 11 additions & 4 deletions pkg/goldpinger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package goldpinger

import (
"time"

"k8s.io/client-go/kubernetes"
)

Expand All @@ -34,12 +36,17 @@ var GoldpingerConfig = struct {
Namespace *string `long:"namespace" description:"namespace to use to discover goldpinger pods in the cluster (empty for all). Defaults to discovering the namespace for the current pod" env:"NAMESPACE"`
KubernetesClient *kubernetes.Clientset

DnsHosts []string `long:"host-to-resolve" description:"A host to attempt dns resolve on (space delimited)" env:"HOSTS_TO_RESOLVE" env-delim:" "`
DnsHosts []string `long:"host-to-resolve" description:"A host to attempt dns resolve on (space delimited)" env:"HOSTS_TO_RESOLVE" env-delim:" "`
TCPTargets []string `long:"tcp-targets" description:"A list of external targets(<host>:<port> or <ip>:<port>) to attempt a TCP check on (space delimited)" env:"TCP_TARGETS" env-delim:" "`
HTTPTargets []string `long:"http-targets" description:"A list of external targets(<http or https>://<url>) to attempt an HTTP check on. A 200 HTTP code is considered successful.(space delimited)" env:"HTTP_TARGETS" env-delim:" "`

IPVersions []string `long:"ip-versions" description:"The IP versions to use (space delimited). Possible values are 4 and 6 (defaults to 4)." env:"IP_VERSIONS" env-delim:" "`

// Timeouts
PingTimeoutMs int64 `long:"ping-timeout-ms" description:"The timeout in milliseconds for a ping call to other goldpinger pods" env:"PING_TIMEOUT_MS" default:"300"`
CheckTimeoutMs int64 `long:"check-timeout-ms" description:"The timeout in milliseconds for a check call to other goldpinger pods" env:"CHECK_TIMEOUT_MS" default:"1000"`
CheckAllTimeoutMs int64 `long:"check-all-timeout-ms" description:"The timeout in milliseconds for a check-all call to other goldpinger pods" env:"CHECK_ALL_TIMEOUT_MS" default:"5000"`
PingTimeout time.Duration `long:"ping-timeout" description:"The timeout for a ping call to other goldpinger pods" env:"PING_TIMEOUT" default:"300ms"`
CheckTimeout time.Duration `long:"check-timeout" description:"The timeout for a check call to other goldpinger pods" env:"CHECK_TIMEOUT" default:"1000ms"`
CheckAllTimeout time.Duration `long:"check-all-timeout" description:"The timeout for a check-all call to other goldpinger pods" env:"CHECK_ALL_TIMEOUT" default:"5000ms"`
TCPCheckTimeout time.Duration `long:"tcp-targets-timeout" description:"The timeout for a tcp check on the provided tcp-targets" env:"TCP_TARGETS_TIMEOUT" default:"500ms"`
DnsCheckTimeout time.Duration `long:"dns-targets-timeout" description:"The timeout for a dns check on the provided dns-targets" env:"DNS_TARGETS_TIMEOUT" default:"500ms"`
HTTPCheckTimeout time.Duration `long:"http-targets-timeout" description:"The timeout for a http check on the provided http-targets" env:"HTTP_TARGETS_TIMEOUT" default:"500ms"`
}{}
3 changes: 1 addition & 2 deletions pkg/goldpinger/heatmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
"net/http"
"sort"
"strconv"
"time"

"go.uber.org/zap"
"golang.org/x/image/font"
Expand Down Expand Up @@ -83,7 +82,7 @@ func HeatmapHandler(w http.ResponseWriter, r *http.Request) {

ctx, cancel := context.WithTimeout(
r.Context(),
time.Duration(GoldpingerConfig.CheckAllTimeoutMs)*time.Millisecond,
GoldpingerConfig.CheckAllTimeout,
)
defer cancel()

Expand Down
3 changes: 2 additions & 1 deletion pkg/goldpinger/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ package goldpinger

import (
"context"
"go.uber.org/zap"
"io/ioutil"

"go.uber.org/zap"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
k8snet "k8s.io/utils/net"
Expand Down
2 changes: 1 addition & 1 deletion pkg/goldpinger/pinger.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ type Pinger struct {
func NewPinger(pod *GoldpingerPod, resultsChan chan<- PingAllPodsResult) *Pinger {
p := Pinger{
pod: pod,
timeout: time.Duration(GoldpingerConfig.PingTimeoutMs) * time.Millisecond,
timeout: GoldpingerConfig.PingTimeout,
resultsChan: resultsChan,
stopChan: make(chan struct{}),

Expand Down
72 changes: 72 additions & 0 deletions pkg/goldpinger/probes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2018 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package goldpinger

import (
"context"
"crypto/tls"
"fmt"
"net"
"net/http"
"net/url"
"time"
)

func doDNSProbe(addr string, timeout time.Duration) error {
resolver := net.Resolver{}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
ips, err := resolver.LookupHost(ctx, addr)
if len(ips) == 0 {
return fmt.Errorf("%s was resolved to 0 ips", addr)
}
return err
}

func doTCPProbe(addr string, timeout time.Duration) error {
conn, err := net.DialTimeout("tcp", addr, timeout)
if conn != nil {
defer conn.Close()
}
return err
}

func doHTTPProbe(addr string, timeout time.Duration) error {
client := http.Client{Timeout: timeout}
u, err := url.Parse(addr)
if err != nil {
return err
}
if u.Scheme != "http" && u.Scheme != "https" {
return fmt.Errorf("invalid url scheme: '%s' in address", u.Scheme)
}
if u.Scheme == "https" {
client.Transport = &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true,
},
}
}
resp, err := client.Get(addr)
if err != nil {
return err
}

defer resp.Body.Close()
if resp.StatusCode != 200 {
return fmt.Errorf("%s returned non-200 resp: %d", addr, resp.StatusCode)
}
return err
}
39 changes: 38 additions & 1 deletion pkg/goldpinger/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,26 @@ var (
"host",
},
)

goldPingerTcpErrorsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "goldpinger_tcp_errors_total",
Help: "Statistics of TCP probe errors per instance",
},
[]string{
"goldpinger_instance",
"host",
},
)
goldPingerHttpErrorsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "goldpinger_http_errors_total",
Help: "Statistics of HTTP probe errors per instance",
},
[]string{
"goldpinger_instance",
"host",
},
)
bootTime = time.Now()
)

Expand All @@ -115,6 +134,8 @@ func init() {
prometheus.MustRegister(goldpingerResponseTimeKubernetesHistogram)
prometheus.MustRegister(goldpingerErrorsCounter)
prometheus.MustRegister(goldpingerDnsErrorsCounter)
prometheus.MustRegister(goldPingerHttpErrorsCounter)
prometheus.MustRegister(goldPingerTcpErrorsCounter)
zap.L().Info("Metrics setup - see /metrics")
}

Expand Down Expand Up @@ -173,6 +194,22 @@ func CountDnsError(host string) {
).Inc()
}

// CountTcpError counts instances of tcp errors for prober
func CountTcpError(host string) {
goldPingerTcpErrorsCounter.WithLabelValues(
GoldpingerConfig.Hostname,
host,
).Inc()
}

// CountHttpError counts instances of tcp errors for prober
func CountHttpError(host string) {
goldPingerHttpErrorsCounter.WithLabelValues(
GoldpingerConfig.Hostname,
host,
).Inc()
}

// returns a timer for easy observing of the durations of calls to kubernetes API
func GetLabeledKubernetesCallsTimer() *prometheus.Timer {
return prometheus.NewTimer(
Expand Down
13 changes: 8 additions & 5 deletions pkg/goldpinger/updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,17 @@ func updateCounters() {
}
}
CountHealthyUnhealthyNodes(counterHealthy, float64(len(checkResults.PodResults))-counterHealthy)
// check DNS, but don't block the access to checkResultsMux
// check external targets, don't block the access to checkResultsMux
nodesHealthy := int(counterHealthy) == len(checkResults.PodResults)
go func(healthySoFar bool) {
if healthySoFar {
for _, response := range *checkDNS() {
if response.Error != "" {
healthySoFar = false
break
probeResults := checkTargets()
for host := range probeResults {
for _, response := range probeResults[host] {
if response.Error != "" {
healthySoFar = false
break
}
}
}
}
Expand Down
Loading

0 comments on commit 0b7bf0f

Please sign in to comment.