Skip to content

Commit

Permalink
Clean stranded partitions
Browse files Browse the repository at this point in the history
  • Loading branch information
austb committed Oct 16, 2024
1 parent 46a9912 commit 6780c59
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 14 deletions.
40 changes: 29 additions & 11 deletions src/puppetlabs/puppetdb/scf/storage.clj
Original file line number Diff line number Diff line change
Expand Up @@ -1642,6 +1642,21 @@
(jdbc/do-commands (format "ALTER TABLE %s DETACH PARTITION %s FINALIZE" parent pending))
(str pending))))

(defn find-stranded-partitions
"Identify tables that match the child format of a partitioned table (like reports_historical)
that are not present in the pg_inherits table. These partitions have been detached, but failed
to be deleted.
Tables that are not partitioned will also not be in the pg_inherits table, so you MUST
write a child-format that does not match any non-partitioned tables.
Returns a list of strings. Each string is a stranded partition that should be removed."
[child-format]
(->> ["SELECT tablename FROM pg_tables WHERE tablename ~ ? AND tablename NOT IN ( SELECT inhrelid::regclass::text FROM pg_catalog.pg_inherits)"
child-format]
jdbc/query-to-vec
(map (comp str :tablename))))

(defn prune-daily-partitions
"Either detaches or drops obsolete day-oriented partitions
older than the date. Deletes or detaches only the oldest such candidate if
Expand Down Expand Up @@ -1725,7 +1740,7 @@
"Drops the given set of tables. Will throw an SQLException termination if the
operation takes much longer than PDB_GC_DAILY_PARTITION_DROP_LOCK_TIMEOUT_MS."
[old-partition-tables update-lock-status status-key]
(let [drop #(doseq [table old-partition-tables]
(let [drop #(doseq [table (distinct old-partition-tables)]
(try
(update-lock-status status-key inc)
(jdbc/do-commands
Expand All @@ -1749,9 +1764,10 @@
;; PG14+
(let [detached-tables
(detach-daily-partitions "resource_events_historical" date incremental?
update-lock-status :write-locking-resource-events)]
update-lock-status :write-locking-resource-events)
stranded-tables (find-stranded-partitions "^resource_events_\\d\\d\\d\\d\\d\\d\\d\\dz$")]
(jdbc/with-db-transaction []
(drop-partition-tables! detached-tables
(drop-partition-tables! (concat detached-tables stranded-tables)
update-lock-status :write-locking-resource-events)))))

(defn delete-reports-older-than-in-pg-11!
Expand Down Expand Up @@ -1792,13 +1808,15 @@
;; PG14+
;; Detach partition concurrently must take place outside of a transaction.
(let [detached-resource-event-tables
(detach-daily-partitions "resource_events_historical" effective-resource-events-ttl
incremental? update-lock-status
:write-locking-resource-events)
(detach-daily-partitions "resource_events_historical" effective-resource-events-ttl
incremental? update-lock-status
:write-locking-resource-events)
stranded-events-tables (find-stranded-partitions "^resource_events_\\d\\d\\d\\d\\d\\d\\d\\dz$")
detached-report-tables
(detach-daily-partitions "reports_historical" report-ttl
incremental? update-lock-status
:write-locking-reports)]
(detach-daily-partitions "reports_historical" report-ttl
incremental? update-lock-status
:write-locking-reports)
stranded-reports-tables (find-stranded-partitions "^reports_\\d\\d\\d\\d\\d\\d\\d\\dz$")]
;; Now we can delete the partitions with less intrusive locking.
(jdbc/with-db-transaction []
;; Nothing should acquire locks on the detached tables, but to be safe, acquire
Expand All @@ -1808,9 +1826,9 @@
;; force a resource-events GC. prior to partitioning, this would have happened
;; via a cascade when the report was deleted, but now we just drop whole tables
;; of resource events.
(drop-partition-tables! detached-resource-event-tables
(drop-partition-tables! (concat detached-resource-event-tables stranded-events-tables)
update-lock-status :write-locking-resource-events)
(drop-partition-tables! detached-report-tables
(drop-partition-tables! (concat detached-report-tables stranded-reports-tables)
update-lock-status :write-locking-reports))))))

;; A db version that is "allowed" but not supported is deprecated
Expand Down
28 changes: 25 additions & 3 deletions test/puppetlabs/puppetdb/cli/services_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
[puppetlabs.puppetdb.scf.partitioning
:refer [create-resource-events-partition
create-reports-partition
get-temporal-partitions]]
get-temporal-partitions]
:as part]
[puppetlabs.trapperkeeper.testutils.logging
:refer [with-log-output
logs-matching
Expand Down Expand Up @@ -592,7 +593,7 @@
(with-test-db
(let [config (-> (create-temp-config)
(assoc :database *db* :read-database *read-db*)
(assoc-in [:database :gc-interval] "0.01"))
(assoc-in [:database :gc-interval] "60"))
store-report #(sync-command-post (svc-utils/pdb-cmd-url)
example-certname
"store report"
Expand Down Expand Up @@ -662,7 +663,28 @@
(is (empty?
(jdbc/query ["SELECT tablename FROM pg_tables WHERE tablename = ?" partition-table])))

(jdbc/do-commands "DELETE FROM reports"))))))))
(jdbc/do-commands "DELETE FROM reports")))

(testing "a detached partition that was not removed is cleaned up by gc"
(let [old-ts (-> 2 time/days time/ago)
partition-table (format "reports_%s"
(part/date-suffix (part/to-zoned-date-time (time/to-timestamp old-ts))))]
(store-report (time/to-string old-ts))
(store-report (to-string (now)))

;; Strand the partition before calling GC
(jdbc/do-commands-outside-txn
(format "ALTER TABLE reports_historical DETACH PARTITION %s CONCURRENTLY" partition-table))

(svcs/sweep-reports! *db* {:incremental? false
:report-ttl (time/parse-period "1d")
:resource-events-ttl (time/parse-period "1d")
:db-lock-status db-lock-status})

(is (empty?
(jdbc/query ["SELECT tablename FROM pg_tables WHERE tablename = ?" partition-table])))

(jdbc/do-commands "DELETE FROM reports"))))))))

(deftest reports-analysis
;; For now, just test for the initial invocation
Expand Down

0 comments on commit 6780c59

Please sign in to comment.