diff --git a/documentation/release_notes_7.markdown b/documentation/release_notes_7.markdown index d28cab699d..b84aaad4a9 100644 --- a/documentation/release_notes_7.markdown +++ b/documentation/release_notes_7.markdown @@ -25,6 +25,10 @@ Released TBD. partially detached and block future garbage collection progress. Garbage collection will now finalize the partition detach operation and remove the table. ([GitHub #4013](https://github.com/puppetlabs/puppetdb/issues/4013)) +* Fixed an issue with report garbage collection where a partition would be + detached, but the table was never deleted. Garbage collection will now + identify and clean-up these tables. + ([GitHub #4013](https://github.com/puppetlabs/puppetdb/issues/4013)) ## PuppetDB 7.20.0 diff --git a/src/puppetlabs/puppetdb/scf/storage.clj b/src/puppetlabs/puppetdb/scf/storage.clj index 7f59f70b43..fca4d7cd64 100644 --- a/src/puppetlabs/puppetdb/scf/storage.clj +++ b/src/puppetlabs/puppetdb/scf/storage.clj @@ -1689,6 +1689,21 @@ (jdbc/do-commands (format "ALTER TABLE %s DETACH PARTITION %s FINALIZE" parent pending)) (str pending)))) +(defn find-stranded-partitions + "Identify tables that match the child format of a partitioned table (like reports_historical) + that are not present in the pg_inherits table. These partitions have been detached, but failed + to be deleted. + + Tables that are not partitioned will also not be in the pg_inherits table, so you MUST + write a child-format that does not match any non-partitioned tables. + + Returns a list of strings. Each string is a stranded partition that should be removed." + [child-format] + (->> ["SELECT tablename FROM pg_tables WHERE tablename ~ ? AND tablename NOT IN ( SELECT inhrelid::regclass::text FROM pg_catalog.pg_inherits)" + child-format] + jdbc/query-to-vec + (map (comp str :tablename)))) + (defn prune-daily-partitions "Either detaches or drops obsolete day-oriented partitions older than the date. Deletes or detaches only the oldest such candidate if @@ -1772,7 +1787,7 @@ "Drops the given set of tables. Will throw an SQLException termination if the operation takes much longer than PDB_GC_DAILY_PARTITION_DROP_LOCK_TIMEOUT_MS." [old-partition-tables update-lock-status status-key] - (let [drop #(doseq [table old-partition-tables] + (let [drop #(doseq [table (distinct old-partition-tables)] (try (update-lock-status status-key inc) (jdbc/do-commands @@ -1796,9 +1811,10 @@ ;; PG14+ (let [detached-tables (detach-daily-partitions "resource_events" date incremental? - update-lock-status :write-locking-resource-events)] + update-lock-status :write-locking-resource-events) + stranded-tables (find-stranded-partitions "^resource_events_\\d\\d\\d\\d\\d\\d\\d\\dz$")] (jdbc/with-db-transaction [] - (drop-partition-tables! detached-tables + (drop-partition-tables! (concat detached-tables stranded-tables) update-lock-status :write-locking-resource-events))))) (defn cleanup-dropped-report-certnames @@ -1851,13 +1867,15 @@ ;; PG14+ ;; Detach partition concurrently must take place outside of a transaction. (let [detached-resource-event-tables - (detach-daily-partitions "resource_events" effective-resource-events-ttl - incremental? update-lock-status - :write-locking-resource-events) + (detach-daily-partitions "resource_events" effective-resource-events-ttl + incremental? update-lock-status + :write-locking-resource-events) + stranded-events-tables (find-stranded-partitions "^resource_events_\\d\\d\\d\\d\\d\\d\\d\\dz$") detached-report-tables - (detach-daily-partitions "reports" report-ttl - incremental? update-lock-status - :write-locking-reports)] + (detach-daily-partitions "reports" report-ttl + incremental? update-lock-status + :write-locking-reports) + stranded-reports-tables (find-stranded-partitions "^reports_\\d\\d\\d\\d\\d\\d\\d\\dz$")] ;; Now we can delete the partitions with less intrusive locking. (jdbc/with-db-transaction [] ;; Nothing should acquire locks on the detached tables, but to be safe, acquire @@ -1867,9 +1885,9 @@ ;; force a resource-events GC. prior to partitioning, this would have happened ;; via a cascade when the report was deleted, but now we just drop whole tables ;; of resource events. - (drop-partition-tables! detached-resource-event-tables + (drop-partition-tables! (concat detached-resource-event-tables stranded-events-tables) update-lock-status :write-locking-resource-events) - (drop-partition-tables! detached-report-tables + (drop-partition-tables! (concat detached-report-tables stranded-reports-tables) update-lock-status :write-locking-reports) ;; since we cannot cascade back to the certnames table anymore, go clean up ;; the latest_report_id column after a GC. diff --git a/test/puppetlabs/puppetdb/cli/services_test.clj b/test/puppetlabs/puppetdb/cli/services_test.clj index 2a3d1245d9..2819d5e58a 100644 --- a/test/puppetlabs/puppetdb/cli/services_test.clj +++ b/test/puppetlabs/puppetdb/cli/services_test.clj @@ -584,7 +584,7 @@ (with-test-db (let [config (-> (create-temp-config) (assoc :database *db* :read-database *read-db*) - (assoc-in [:database :gc-interval] "0.01")) + (assoc-in [:database :gc-interval] "60")) store-report #(sync-command-post (svc-utils/pdb-cmd-url) example-certname "store report" @@ -610,7 +610,7 @@ :db-lock-status db-lock-status}) (is (empty? (jdbc/query ["SELECT * FROM reports"])))) - ;; This test is not applicable unless our Postgres version is new enough + ;; These tests are not applicable unless our Postgres version is new enough ;; to support the concurrent partition detach feature. (when (scf-store/detach-partitions-concurrently?) (testing "a partition stuck in the pending state is finalized and removed" @@ -652,4 +652,25 @@ (is (empty? (jdbc/query ["SELECT tablename FROM pg_tables WHERE tablename = ?" partition-table]))) - (jdbc/do-commands "DELETE FROM reports"))))))))) + (jdbc/do-commands "DELETE FROM reports"))) + + (testing "a detached partition that was not removed is cleaned up by gc" + (let [old-ts (-> 2 time/days time/ago) + partition-table (format "reports_%s" + (part/date-suffix (part/to-zoned-date-time (time/to-timestamp old-ts))))] + (store-report (time/to-string old-ts)) + (store-report (to-string (now))) + + ;; Strand the partition before calling GC + (jdbc/do-commands-outside-txn + (format "ALTER TABLE reports DETACH PARTITION %s CONCURRENTLY" partition-table)) + + (svcs/sweep-reports! *db* {:incremental? false + :report-ttl (time/parse-period "1d") + :resource-events-ttl (time/parse-period "1d") + :db-lock-status db-lock-status}) + + (is (empty? + (jdbc/query ["SELECT tablename FROM pg_tables WHERE tablename = ?" partition-table]))) + + (jdbc/do-commands "DELETE FROM reports")))))))))