diff --git a/documentation/release_notes_7.markdown b/documentation/release_notes_7.markdown index 0ab9657cbf..d28cab699d 100644 --- a/documentation/release_notes_7.markdown +++ b/documentation/release_notes_7.markdown @@ -15,6 +15,17 @@ canonical: "/puppetdb/latest/release_notes.html" # PuppetDB: Release notes +## PuppetDB 7.20.1 + +Released TBD. + +### Bug fixes + +* Fixed an issue with report garbage collection where a partition would become + partially detached and block future garbage collection progress. Garbage + collection will now finalize the partition detach operation and remove the + table. ([GitHub #4013](https://github.com/puppetlabs/puppetdb/issues/4013)) + ## PuppetDB 7.20.0 Released October 22 2024 diff --git a/src/puppetlabs/puppetdb/jdbc.clj b/src/puppetlabs/puppetdb/jdbc.clj index 35dd15bafd..bc222d0451 100644 --- a/src/puppetlabs/puppetdb/jdbc.clj +++ b/src/puppetlabs/puppetdb/jdbc.clj @@ -24,6 +24,7 @@ (or ({:admin-shutdown "57P01" :invalid-regular-expression "2201B" :program-limit-exceeded "54000" + :not-in-prerequisite-state "55000" :lock-not-available "55P03" :query-canceled "57014"} kw-name) diff --git a/src/puppetlabs/puppetdb/scf/storage.clj b/src/puppetlabs/puppetdb/scf/storage.clj index 8459b94e36..7f59f70b43 100644 --- a/src/puppetlabs/puppetdb/scf/storage.clj +++ b/src/puppetlabs/puppetdb/scf/storage.clj @@ -1673,6 +1673,22 @@ (let [{current-db-version :version} (sutils/db-metadata)] (not (neg? (compare current-db-version pg14-db))))) +(defn finalize-pending-detach + "Finalize a previously failed detach operation. A partitioned table can + only have one partition pending detachment at any time." + [parent] + (let [pending (->> ["SELECT inhrelid::regclass AS child + FROM pg_catalog.pg_inherits + WHERE inhparent = ?::regclass AND inhdetachpending = true" + parent] + jdbc/query-to-vec + first + :child)] + (when pending + (log/info (trs "Finalizing detach for partition {0}" pending)) + (jdbc/do-commands (format "ALTER TABLE %s DETACH PARTITION %s FINALIZE" parent pending)) + (str pending)))) + (defn prune-daily-partitions "Either detaches or drops obsolete day-oriented partitions older than the date. Deletes or detaches only the oldest such candidate if @@ -1698,14 +1714,25 @@ candidates (->> (partitioning/get-partition-names table-prefix) (filter expired?) sort) + detach (fn [parent child] + (jdbc/do-commands-outside-txn + (format "alter table %s detach partition %s concurrently" parent child))) drop-one (fn [table] (update-lock-status status-key inc) (try! (if just-detach? - (jdbc/do-commands-outside-txn - (format "alter table %s detach partition %s concurrently" table-prefix table)) + (try + (detach table-prefix table) + (catch SQLException ex + (if (= (jdbc/sql-state :not-in-prerequisite-state) (.getSQLState ex)) + (let [finalized-table (finalize-pending-detach table-prefix)] + (when-not (= finalized-table table) + ;; Retry, unless the finalized partition detach was + ;; for the same table + (detach table-prefix table))) + (throw ex)))) (jdbc/do-commands - (format "drop table if exists %s cascade" table))) + (format "drop table if exists %s cascade" table))) (finally (update-lock-status status-key dec)))) drop #(if incremental? diff --git a/test/puppetlabs/puppetdb/cli/services_test.clj b/test/puppetlabs/puppetdb/cli/services_test.clj index d64ecf8114..2a3d1245d9 100644 --- a/test/puppetlabs/puppetdb/cli/services_test.clj +++ b/test/puppetlabs/puppetdb/cli/services_test.clj @@ -5,7 +5,8 @@ [puppetlabs.puppetdb.command.constants :as cmd-consts] [puppetlabs.puppetdb.lint :refer [ignore-value]] [puppetlabs.puppetdb.scf.partitioning - :refer [get-temporal-partitions]] + :refer [get-temporal-partitions] + :as part] [puppetlabs.trapperkeeper.testutils.logging :refer [with-log-output logs-matching @@ -607,4 +608,48 @@ :report-ttl (time/parse-period "1d") :resource-events-ttl (time/parse-period "1d") :db-lock-status db-lock-status}) - (is (empty? (jdbc/query ["SELECT * FROM reports"]))))))))) + (is (empty? (jdbc/query ["SELECT * FROM reports"])))) + + ;; This test is not applicable unless our Postgres version is new enough + ;; to support the concurrent partition detach feature. + (when (scf-store/detach-partitions-concurrently?) + (testing "a partition stuck in the pending state is finalized and removed" + (let [old-ts (-> 2 time/days time/ago) + partition-table (format "reports_%s" + (part/date-suffix (part/to-zoned-date-time (time/to-timestamp old-ts)))) + lock-acquired (promise)] + (store-report (time/to-string old-ts)) + (store-report (to-string (now))) + + (future + ;; Create a query that will block the ACCESS EXCLUSIVE lock needed + ;; by the second transaction of the concurrent detach below + (jdbc/with-transacted-connection *read-db* + (jdbc/with-db-transaction [] + (jdbc/query [(format "select * from %s" partition-table)]) + (deliver lock-acquired partition-table) + (Thread/sleep 1000)))) + + ;; Wait until we are sure that the detach partition operation will be blocked + @lock-acquired + + (try + (jdbc/do-commands-outside-txn + "SET statement_timeout = 100" + (format "ALTER TABLE reports DETACH PARTITION %s CONCURRENTLY" partition-table)) + (catch java.sql.SQLException _ex)) + + (jdbc/do-commands-outside-txn "SET statement_timeout = 0") + + (is (= [{:inhdetachpending true}] + (jdbc/query ["select inhdetachpending from pg_catalog.pg_inherits where inhparent = 'reports'::regclass and inhrelid = ?::regclass" partition-table]))) + + (svcs/sweep-reports! *db* {:incremental? false + :report-ttl (time/parse-period "1d") + :resource-events-ttl (time/parse-period "1d") + :db-lock-status db-lock-status}) + + (is (empty? + (jdbc/query ["SELECT tablename FROM pg_tables WHERE tablename = ?" partition-table]))) + + (jdbc/do-commands "DELETE FROM reports")))))))))