From 511de7108a24ba5b3836352eb56b1775b47dc875 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 20 Mar 2024 16:03:42 +0000 Subject: [PATCH 01/10] Wait for cluster/leave to succeed before exiting Depending on whether the leader is shutting down itself, it may take some time for it to handle the leave request. We should spin until we get confirmation from the leader. (Only then it's safe to remove the now obsolete database file.) --- templates/ovndbcluster/bin/cleanup.sh | 13 +++++++++++++ templates/ovndbcluster/bin/setup.sh | 1 - 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/templates/ovndbcluster/bin/cleanup.sh b/templates/ovndbcluster/bin/cleanup.sh index dd7ec306..554d7162 100755 --- a/templates/ovndbcluster/bin/cleanup.sh +++ b/templates/ovndbcluster/bin/cleanup.sh @@ -22,4 +22,17 @@ if [[ "${DB_TYPE}" == "sb" ]]; then fi if [[ "$(hostname)" != "{{ .SERVICE_NAME }}-0" ]]; then ovs-appctl -t /tmp/ovn${DB_TYPE}_db.ctl cluster/leave ${DB_NAME} + + # wait for when the leader confirms we left the cluster + while true; do + # TODO: is there a better way to detect the cluster left state?.. + STATUS=$(ovs-appctl -t /tmp/ovn${DB_TYPE}_db.ctl cluster/status ${DB_NAME} | grep Status: | awk -e '{print $2}') + if [ -z "$STATUS" -o "x$STATUS" = "xleft cluster" ]; then + break + fi + sleep 1 + done + + # now that we left, the database file is no longer valid + rm -f /etc/ovn/ovn${DB_TYPE}_db.db fi diff --git a/templates/ovndbcluster/bin/setup.sh b/templates/ovndbcluster/bin/setup.sh index e6fb52d7..acc53220 100755 --- a/templates/ovndbcluster/bin/setup.sh +++ b/templates/ovndbcluster/bin/setup.sh @@ -34,7 +34,6 @@ else fi if [[ "$(hostname)" != "{{ .SERVICE_NAME }}-0" ]]; then - rm -f /etc/ovn/ovn${DB_TYPE}_db.db #ovsdb-tool join-cluster /etc/ovn/ovn${DB_TYPE}_db.db ${DB_NAME} tcp:$(hostname).{{ .SERVICE_NAME }}.${NAMESPACE}.svc.cluster.local:${RAFT_PORT} tcp:{{ .SERVICE_NAME }}-0.{{ .SERVICE_NAME }}.${NAMESPACE}.svc.cluster.local:${RAFT_PORT} OPTS="--db-${DB_TYPE}-cluster-remote-addr={{ .SERVICE_NAME }}-0.{{ .SERVICE_NAME }}.${NAMESPACE}.svc.cluster.local --db-${DB_TYPE}-cluster-remote-port=${RAFT_PORT}" fi From 52820dde4126f58aeffc552e911ed4700456f908 Mon Sep 17 00:00:00 2001 From: Jakub Libosvar Date: Tue, 12 Mar 2024 13:33:32 -0400 Subject: [PATCH 02/10] Set Parallel pod management policy for OVS DBs There is no need to wait for another pods to be spawned first. The OVN DB pods can and should start all at a time. Signed-off-by: Jakub Libosvar --- pkg/ovndbcluster/statefulset.go | 5 +++-- tests/functional/ovndbcluster_controller_test.go | 11 +++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pkg/ovndbcluster/statefulset.go b/pkg/ovndbcluster/statefulset.go index 3f42311a..2cfbbdf1 100644 --- a/pkg/ovndbcluster/statefulset.go +++ b/pkg/ovndbcluster/statefulset.go @@ -133,8 +133,9 @@ func StatefulSet( Selector: &metav1.LabelSelector{ MatchLabels: labels, }, - ServiceName: serviceName, - Replicas: instance.Spec.Replicas, + ServiceName: serviceName, + PodManagementPolicy: appsv1.ParallelPodManagement, + Replicas: instance.Spec.Replicas, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Annotations: annotations, diff --git a/tests/functional/ovndbcluster_controller_test.go b/tests/functional/ovndbcluster_controller_test.go index 1bcb6b19..053103cf 100644 --- a/tests/functional/ovndbcluster_controller_test.go +++ b/tests/functional/ovndbcluster_controller_test.go @@ -27,6 +27,7 @@ import ( condition "github.com/openstack-k8s-operators/lib-common/modules/common/condition" . "github.com/openstack-k8s-operators/lib-common/modules/common/test/helpers" ovnv1 "github.com/openstack-k8s-operators/ovn-operator/api/v1beta1" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" ) @@ -159,6 +160,16 @@ var _ = Describe("OVNDBCluster controller", func() { Expect(OVNDBCluster.Spec.DBType).Should(Equal(ovnv1.NBDBType)) }) + It("should have the StatefulSet with podManagementPolicy set to Parallel", func() { + statefulSetName := types.NamespacedName{ + Namespace: namespace, + Name: "ovsdbserver-nb", + } + ss := th.GetStatefulSet(statefulSetName) + + Expect(ss.Spec.PodManagementPolicy).Should(Equal(appsv1.ParallelPodManagement)) + }) + It("should have the Status fields initialized", func() { OVNDBCluster := GetOVNDBCluster(OVNDBClusterName) Expect(OVNDBCluster.Status.Hash).To(BeEmpty()) From 2a09a92765dbc9d4532e3c4aa7f0159f44c0f5bc Mon Sep 17 00:00:00 2001 From: Jakub Libosvar Date: Tue, 12 Mar 2024 13:33:32 -0400 Subject: [PATCH 03/10] settings.sh: wait for db up before executing commands Signed-off-by: Jakub Libosvar --- templates/ovndbcluster/bin/settings.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/templates/ovndbcluster/bin/settings.sh b/templates/ovndbcluster/bin/settings.sh index d48f0d4d..6dfc77cd 100755 --- a/templates/ovndbcluster/bin/settings.sh +++ b/templates/ovndbcluster/bin/settings.sh @@ -25,10 +25,10 @@ DB_SCHEME="ptcp" exec 1>/proc/1/fd/1 2>&1 if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then - while [ ! -S /tmp/ovn${DB_TYPE}_db.ctl ]; do - echo DB Server Not ready, waiting - sleep 1 - done + # The command will wait until the daemon is connected and the DB is available + # All following ctl invocation will use the local DB replica in the daemon + export OVN_${DB_TYPE^^}_DAEMON=$(ovn-${DB_TYPE}ctl --detach) + daemon_var=OVN_${DB_TYPE^^}_DAEMON PODNAME=$(hostname -f | cut -d. -f1,2) PODIPV6=$(grep "${PODNAME}" /etc/hosts | grep ':' | cut -d$'\t' -f1) @@ -48,4 +48,7 @@ if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then ovn-${DB_TYPE}ctl --no-leader-only --inactivity-probe={{ .OVN_INACTIVITY_PROBE }} set-connection ${DB_SCHEME}:${DB_PORT}:${DB_ADDR} done ovn-${DB_TYPE}ctl --no-leader-only list connection + + # The daemon is no longer needed, kill it + kill $(echo ${!daemon_var} | sed "s/.*ovn-${DB_TYPE}ctl\.\([0-9]*\).*/\1/") fi From ae27ff2cca559a3b4668af1c0f5a529114878952 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 20 Mar 2024 16:34:53 +0000 Subject: [PATCH 04/10] ovndbcluster: get rid of postStart This patch moves the code from settings.sh into setup.sh. The settings will still happen, but they will not block transition of the pod into Ready state, which is important to do even if raft cluster is not re-built, yet. This is in line with k8s guidelines about not making liveness / readiness checks depend on other pod states (in this case - on the connectivity to other raft members.) --- pkg/ovndbcluster/statefulset.go | 9 ---- templates/ovndbcluster/bin/settings.sh | 54 ------------------- templates/ovndbcluster/bin/setup.sh | 29 +++++++++- .../ovndbcluster_controller_test.go | 2 +- 4 files changed, 29 insertions(+), 65 deletions(-) delete mode 100755 templates/ovndbcluster/bin/settings.sh diff --git a/pkg/ovndbcluster/statefulset.go b/pkg/ovndbcluster/statefulset.go index 2cfbbdf1..ad7795ac 100644 --- a/pkg/ovndbcluster/statefulset.go +++ b/pkg/ovndbcluster/statefulset.go @@ -56,7 +56,6 @@ func StatefulSet( } var preStopCmd []string - var postStartCmd []string cmd := []string{"/usr/bin/dumb-init"} args := []string{"--single-child", "--", "/bin/bash", "-c", ServiceCommand} // @@ -69,19 +68,11 @@ func StatefulSet( } readinessProbe.Exec = livenessProbe.Exec - postStartCmd = []string{ - "/usr/local/bin/container-scripts/settings.sh", - } preStopCmd = []string{ "/usr/local/bin/container-scripts/cleanup.sh", } lifecycle := &corev1.Lifecycle{ - PostStart: &corev1.LifecycleHandler{ - Exec: &corev1.ExecAction{ - Command: postStartCmd, - }, - }, PreStop: &corev1.LifecycleHandler{ Exec: &corev1.ExecAction{ Command: preStopCmd, diff --git a/templates/ovndbcluster/bin/settings.sh b/templates/ovndbcluster/bin/settings.sh deleted file mode 100755 index 6dfc77cd..00000000 --- a/templates/ovndbcluster/bin/settings.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright 2022 Red Hat Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. -set -ex -DB_TYPE="{{ .DB_TYPE }}" -DB_PORT="{{ .DB_PORT }}" -{{- if .TLS }} -DB_SCHEME="pssl" -{{- else }} -DB_SCHEME="ptcp" -{{- end }} - -exec 1>/proc/1/fd/1 2>&1 - -if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then - # The command will wait until the daemon is connected and the DB is available - # All following ctl invocation will use the local DB replica in the daemon - export OVN_${DB_TYPE^^}_DAEMON=$(ovn-${DB_TYPE}ctl --detach) - daemon_var=OVN_${DB_TYPE^^}_DAEMON - - PODNAME=$(hostname -f | cut -d. -f1,2) - PODIPV6=$(grep "${PODNAME}" /etc/hosts | grep ':' | cut -d$'\t' -f1) - - if [[ "" = "${PODIPV6}" ]]; then - DB_ADDR="0.0.0.0" - else - DB_ADDR="[::]" - fi - -{{- if .TLS }} - ovn-${DB_TYPE}ctl --no-leader-only set-ssl {{.OVNDB_KEY_PATH}} {{.OVNDB_CERT_PATH}} {{.OVNDB_CACERT_PATH}} - ovn-${DB_TYPE}ctl --no-leader-only set-connection ${DB_SCHEME}:${DB_PORT}:0.0.0.0 -{{- end }} - - while [ "$(ovn-${DB_TYPE}ctl --no-leader-only get connection . inactivity_probe)" != "{{ .OVN_INACTIVITY_PROBE }}" ]; do - ovn-${DB_TYPE}ctl --no-leader-only --inactivity-probe={{ .OVN_INACTIVITY_PROBE }} set-connection ${DB_SCHEME}:${DB_PORT}:${DB_ADDR} - done - ovn-${DB_TYPE}ctl --no-leader-only list connection - - # The daemon is no longer needed, kill it - kill $(echo ${!daemon_var} | sed "s/.*ovn-${DB_TYPE}ctl\.\([0-9]*\).*/\1/") -fi diff --git a/templates/ovndbcluster/bin/setup.sh b/templates/ovndbcluster/bin/setup.sh index acc53220..7ac2d0c1 100755 --- a/templates/ovndbcluster/bin/setup.sh +++ b/templates/ovndbcluster/bin/setup.sh @@ -16,6 +16,11 @@ set -ex DB_TYPE="{{ .DB_TYPE }}" DB_PORT="{{ .DB_PORT }}" +{{- if .TLS }} +DB_SCHEME="pssl" +{{- else }} +DB_SCHEME="ptcp" +{{- end }} RAFT_PORT="{{ .RAFT_PORT }}" NAMESPACE="{{ .NAMESPACE }}" OPTS="" @@ -72,4 +77,26 @@ set "$@" --ovn-${DB_TYPE}-log=-vconsole:{{ .OVN_LOG_LEVEL }} set "$@" --ovn-${DB_TYPE}-logfile=/dev/null # don't log to file (we already log to console) -$@ ${OPTS} run_${DB_TYPE}_ovsdb -- -vfile:off +$@ ${OPTS} run_${DB_TYPE}_ovsdb -- -vfile:off & + +if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then + # The command will wait until the daemon is connected and the DB is available + # All following ctl invocation will use the local DB replica in the daemon + export OVN_${DB_TYPE^^}_DAEMON=$(ovn-${DB_TYPE}ctl --detach) + daemon_var=OVN_${DB_TYPE^^}_DAEMON + +{{- if .TLS }} + ovn-${DB_TYPE}ctl --no-leader-only set-ssl {{.OVNDB_KEY_PATH}} {{.OVNDB_CERT_PATH}} {{.OVNDB_CACERT_PATH}} + ovn-${DB_TYPE}ctl --no-leader-only set-connection ${DB_SCHEME}:${DB_PORT}:0.0.0.0 +{{- end }} + + while [ "$(ovn-${DB_TYPE}ctl --no-leader-only get connection . inactivity_probe)" != "{{ .OVN_INACTIVITY_PROBE }}" ]; do + ovn-${DB_TYPE}ctl --no-leader-only --inactivity-probe={{ .OVN_INACTIVITY_PROBE }} set-connection ${DB_SCHEME}:${DB_PORT}:${DB_ADDR} + done + ovn-${DB_TYPE}ctl --no-leader-only list connection + + # The daemon is no longer needed, kill it + kill $(echo ${!daemon_var} | sed "s/.*ovn-${DB_TYPE}ctl\.\([0-9]*\).*/\1/") +fi + +wait diff --git a/tests/functional/ovndbcluster_controller_test.go b/tests/functional/ovndbcluster_controller_test.go index 053103cf..869e2934 100644 --- a/tests/functional/ovndbcluster_controller_test.go +++ b/tests/functional/ovndbcluster_controller_test.go @@ -501,7 +501,7 @@ var _ = Describe("OVNDBCluster controller", func() { return *th.GetConfigMap(scriptsCM) }, timeout, interval).ShouldNot(BeNil()) - Expect(th.GetConfigMap(scriptsCM).Data["settings.sh"]).Should( + Expect(th.GetConfigMap(scriptsCM).Data["setup.sh"]).Should( ContainSubstring("DB_SCHEME=\"pssl\"")) Expect(th.GetConfigMap(scriptsCM).Data["setup.sh"]).Should(And( ContainSubstring("-db-ssl-key="), From 6ceb0dce4728c87eea203bcf2fd11b19d2ef8204 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 20 Mar 2024 17:14:11 +0000 Subject: [PATCH 05/10] Simplify ovn-*ctl daemon killing logic Start with --pidfile and then use the file to kill. --- templates/ovndbcluster/bin/setup.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/ovndbcluster/bin/setup.sh b/templates/ovndbcluster/bin/setup.sh index 7ac2d0c1..45b5ad3b 100755 --- a/templates/ovndbcluster/bin/setup.sh +++ b/templates/ovndbcluster/bin/setup.sh @@ -82,8 +82,7 @@ $@ ${OPTS} run_${DB_TYPE}_ovsdb -- -vfile:off & if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then # The command will wait until the daemon is connected and the DB is available # All following ctl invocation will use the local DB replica in the daemon - export OVN_${DB_TYPE^^}_DAEMON=$(ovn-${DB_TYPE}ctl --detach) - daemon_var=OVN_${DB_TYPE^^}_DAEMON + export OVN_${DB_TYPE^^}_DAEMON=$(ovn-${DB_TYPE}ctl --pidfile --detach) {{- if .TLS }} ovn-${DB_TYPE}ctl --no-leader-only set-ssl {{.OVNDB_KEY_PATH}} {{.OVNDB_CERT_PATH}} {{.OVNDB_CACERT_PATH}} @@ -96,7 +95,8 @@ if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then ovn-${DB_TYPE}ctl --no-leader-only list connection # The daemon is no longer needed, kill it - kill $(echo ${!daemon_var} | sed "s/.*ovn-${DB_TYPE}ctl\.\([0-9]*\).*/\1/") + kill $(cat $OVN_RUNDIR/ovn-${DB_TYPE}ctl.pid) + unset OVN_${DB_TYPE^^}_DAEMON fi wait From b361fa99b82587cd50f7b87a836a300b8231cf3b Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 20 Mar 2024 17:17:50 +0000 Subject: [PATCH 06/10] Always use --no-leader-only when using ovn-*ctl In case pod0 is *not* a leader anymore, we should still be able to proceed with the setup. --- templates/ovndbcluster/bin/setup.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/templates/ovndbcluster/bin/setup.sh b/templates/ovndbcluster/bin/setup.sh index 45b5ad3b..33d4dfe9 100755 --- a/templates/ovndbcluster/bin/setup.sh +++ b/templates/ovndbcluster/bin/setup.sh @@ -79,20 +79,21 @@ set "$@" --ovn-${DB_TYPE}-logfile=/dev/null # don't log to file (we already log to console) $@ ${OPTS} run_${DB_TYPE}_ovsdb -- -vfile:off & +CTLCMD="ovn-${DB_TYPE}ctl --no-leader-only" if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then # The command will wait until the daemon is connected and the DB is available # All following ctl invocation will use the local DB replica in the daemon - export OVN_${DB_TYPE^^}_DAEMON=$(ovn-${DB_TYPE}ctl --pidfile --detach) + export OVN_${DB_TYPE^^}_DAEMON=$(${CTLCMD} --pidfile --detach) {{- if .TLS }} - ovn-${DB_TYPE}ctl --no-leader-only set-ssl {{.OVNDB_KEY_PATH}} {{.OVNDB_CERT_PATH}} {{.OVNDB_CACERT_PATH}} - ovn-${DB_TYPE}ctl --no-leader-only set-connection ${DB_SCHEME}:${DB_PORT}:0.0.0.0 + ${CTLCMD} set-ssl {{.OVNDB_KEY_PATH}} {{.OVNDB_CERT_PATH}} {{.OVNDB_CACERT_PATH}} + ${CTLCMD} set-connection ${DB_SCHEME}:${DB_PORT}:0.0.0.0 {{- end }} - while [ "$(ovn-${DB_TYPE}ctl --no-leader-only get connection . inactivity_probe)" != "{{ .OVN_INACTIVITY_PROBE }}" ]; do - ovn-${DB_TYPE}ctl --no-leader-only --inactivity-probe={{ .OVN_INACTIVITY_PROBE }} set-connection ${DB_SCHEME}:${DB_PORT}:${DB_ADDR} + while [ "$(${CTLCMD} get connection . inactivity_probe)" != "{{ .OVN_INACTIVITY_PROBE }}" ]; do + ${CTLCMD} --inactivity-probe={{ .OVN_INACTIVITY_PROBE }} set-connection ${DB_SCHEME}:${DB_PORT}:${DB_ADDR} done - ovn-${DB_TYPE}ctl --no-leader-only list connection + ${CTLCMD} list connection # The daemon is no longer needed, kill it kill $(cat $OVN_RUNDIR/ovn-${DB_TYPE}ctl.pid) From df93d5dc1b216e51afca835a3ea417d9b2ed8f6d Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 20 Mar 2024 17:28:11 +0000 Subject: [PATCH 07/10] Add more comments to setup.sh Explain why we set inactivity-probe with -ctl command. --- templates/ovndbcluster/bin/setup.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/templates/ovndbcluster/bin/setup.sh b/templates/ovndbcluster/bin/setup.sh index 33d4dfe9..455e50c7 100755 --- a/templates/ovndbcluster/bin/setup.sh +++ b/templates/ovndbcluster/bin/setup.sh @@ -79,7 +79,9 @@ set "$@" --ovn-${DB_TYPE}-logfile=/dev/null # don't log to file (we already log to console) $@ ${OPTS} run_${DB_TYPE}_ovsdb -- -vfile:off & +# Once the database is running, we will attempt to configure db options CTLCMD="ovn-${DB_TYPE}ctl --no-leader-only" + if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then # The command will wait until the daemon is connected and the DB is available # All following ctl invocation will use the local DB replica in the daemon @@ -90,6 +92,19 @@ if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then ${CTLCMD} set-connection ${DB_SCHEME}:${DB_PORT}:0.0.0.0 {{- end }} + # OVN does not support setting inactivity-probe through --remote cli arg so + # we have to set it after database is up. + # + # In theory, ovsdb.local-config(5) could be used to configure inactivity + # probe using a local ovsdb-server. But the future of this database is + # unclear, and it was largely abandoned by the community in mid-flight, so + # no tools exist to configure connections using this database. It may even + # be that this scheme will be abandoned in the future, because its features + # are covered by ovs text config file support added in latest ovs releases. + # + # TODO: Consider migrating inactivity probe setting to config files when + # we update to ovs 3.3. See --config-file in ovsdb-server(1) for more + # details. while [ "$(${CTLCMD} get connection . inactivity_probe)" != "{{ .OVN_INACTIVITY_PROBE }}" ]; do ${CTLCMD} --inactivity-probe={{ .OVN_INACTIVITY_PROBE }} set-connection ${DB_SCHEME}:${DB_PORT}:${DB_ADDR} done From c4c2461d3e5d2411b764c29d2351827fd1219865 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 20 Mar 2024 19:36:05 +0000 Subject: [PATCH 08/10] scripts: explain treating -0 raft pod differently from the rest --- templates/ovndbcluster/bin/cleanup.sh | 8 ++++++++ templates/ovndbcluster/bin/setup.sh | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/templates/ovndbcluster/bin/cleanup.sh b/templates/ovndbcluster/bin/cleanup.sh index 554d7162..bd3588eb 100755 --- a/templates/ovndbcluster/bin/cleanup.sh +++ b/templates/ovndbcluster/bin/cleanup.sh @@ -20,6 +20,9 @@ DB_TYPE="{{ .DB_TYPE }}" if [[ "${DB_TYPE}" == "sb" ]]; then DB_NAME="OVN_Southbound" fi + +# There is nothing special about -0 pod, except that it's always guaranteed to +# exist, assuming any replicas are ordered. if [[ "$(hostname)" != "{{ .SERVICE_NAME }}-0" ]]; then ovs-appctl -t /tmp/ovn${DB_TYPE}_db.ctl cluster/leave ${DB_NAME} @@ -32,7 +35,12 @@ if [[ "$(hostname)" != "{{ .SERVICE_NAME }}-0" ]]; then fi sleep 1 done +fi +# If replicas are 0 and *all* pods are removed, we still want to retain the +# database with its cid/sid for when the cluster is scaled back to > 0, so +# leaving the database file intact for -0 pod. +if [[ "$(hostname)" != "{{ .SERVICE_NAME }}-0" ]]; then # now that we left, the database file is no longer valid rm -f /etc/ovn/ovn${DB_TYPE}_db.db fi diff --git a/templates/ovndbcluster/bin/setup.sh b/templates/ovndbcluster/bin/setup.sh index 455e50c7..e7c73f8e 100755 --- a/templates/ovndbcluster/bin/setup.sh +++ b/templates/ovndbcluster/bin/setup.sh @@ -38,6 +38,10 @@ else DB_ADDR="[::]" fi +# The --cluster-remote-addr / --cluster-local-addr options will have effect +# only on bootstrap, when we assume the leadership role for the first pod. +# Later, cli arguments are still passed, but raft membership hints are already +# stored in the databases, and hence the arguments are of no effect. if [[ "$(hostname)" != "{{ .SERVICE_NAME }}-0" ]]; then #ovsdb-tool join-cluster /etc/ovn/ovn${DB_TYPE}_db.db ${DB_NAME} tcp:$(hostname).{{ .SERVICE_NAME }}.${NAMESPACE}.svc.cluster.local:${RAFT_PORT} tcp:{{ .SERVICE_NAME }}-0.{{ .SERVICE_NAME }}.${NAMESPACE}.svc.cluster.local:${RAFT_PORT} OPTS="--db-${DB_TYPE}-cluster-remote-addr={{ .SERVICE_NAME }}-0.{{ .SERVICE_NAME }}.${NAMESPACE}.svc.cluster.local --db-${DB_TYPE}-cluster-remote-port=${RAFT_PORT}" @@ -82,6 +86,9 @@ $@ ${OPTS} run_${DB_TYPE}_ovsdb -- -vfile:off & # Once the database is running, we will attempt to configure db options CTLCMD="ovn-${DB_TYPE}ctl --no-leader-only" +# Nothing special about the first pod, we just know that it always exists with +# replicas > 0 and use it for configuration. In theory, this could be executed +# in any other pod. if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then # The command will wait until the daemon is connected and the DB is available # All following ctl invocation will use the local DB replica in the daemon From d770ed5d14d9abfc2df3ff5c5d0a36f648f4b3e3 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Wed, 20 Mar 2024 19:48:33 +0000 Subject: [PATCH 09/10] Increase termination grace period for raft members It's important to give them a good chance to communicate to the leader that they are leaving. If the leader is offline for a bit (pod deleted), the message may not have enough time to propagate and commit into the leader database (the default is 30 seconds), which may leave the cluster in divergent state. --- pkg/ovndbcluster/statefulset.go | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pkg/ovndbcluster/statefulset.go b/pkg/ovndbcluster/statefulset.go index ad7795ac..356b3389 100644 --- a/pkg/ovndbcluster/statefulset.go +++ b/pkg/ovndbcluster/statefulset.go @@ -115,6 +115,19 @@ func StatefulSet( volumeMounts = append(volumeMounts, svc.CreateVolumeMounts(serviceName)...) } + // NOTE(ihar) ovndb pods leave the raft cluster on delete; it's important + // that they are not interrupted and have a good chance to propagate the + // leave message to the leader. In general case, this should happen near + // instantly. But if the leader pod is itself down / restarting, it may take + // it some time to recover and start processing messages from other members. + // The default value of 30 seconds is sometimes not enough. In local testing, + // 60 seconds seems enough, but we'll take a significantly more conservative + // approach here and set it to 5 minutes. + // + // If the leader is not back even after 5 minutes, we'll give up + // nevertheless, and manual cluster recovery will be needed. + terminationGracePeriodSeconds := int64(300) + statefulset := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ Name: serviceName, @@ -133,7 +146,8 @@ func StatefulSet( Labels: labels, }, Spec: corev1.PodSpec{ - ServiceAccountName: instance.RbacResourceName(), + TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, + ServiceAccountName: instance.RbacResourceName(), Containers: []corev1.Container{ { Name: serviceName, From b74ef56993fb57ef2d403a1824ac477002c2f6fc Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Fri, 22 Mar 2024 15:32:47 +0000 Subject: [PATCH 10/10] ovndb: use DB_ADDR instead of 0.0.0.0 for ssl connection This is to support ipv6 pods. --- templates/ovndbcluster/bin/setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/ovndbcluster/bin/setup.sh b/templates/ovndbcluster/bin/setup.sh index e7c73f8e..89d0b61c 100755 --- a/templates/ovndbcluster/bin/setup.sh +++ b/templates/ovndbcluster/bin/setup.sh @@ -96,7 +96,7 @@ if [[ "$(hostname)" == "{{ .SERVICE_NAME }}-0" ]]; then {{- if .TLS }} ${CTLCMD} set-ssl {{.OVNDB_KEY_PATH}} {{.OVNDB_CERT_PATH}} {{.OVNDB_CACERT_PATH}} - ${CTLCMD} set-connection ${DB_SCHEME}:${DB_PORT}:0.0.0.0 + ${CTLCMD} set-connection ${DB_SCHEME}:${DB_PORT}:${DB_ADDR} {{- end }} # OVN does not support setting inactivity-probe through --remote cli arg so