-
Notifications
You must be signed in to change notification settings - Fork 1
/
partitioning_mods.sql
155 lines (113 loc) · 10.7 KB
/
partitioning_mods.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
-- NOTE: YOU [GENERALLY] DON'T PARTITION POSTGRESQL FOR PERFORMANCE. The partition helps things like autovacuum, backup, and restore.
-- If you don't need to partition for operational needs, you will almost certainly run Senzing faster without it.
--
-- RES_FEAT_EKEY
--
CREATE TABLE RES_FEAT_EKEY_NEW (RES_ENT_ID BIGINT NOT NULL, ECLASS_ID smallint NOT NULL, LENS_ID smallint NOT NULL, LIB_FEAT_ID BIGINT NOT NULL, FTYPE_ID smallint NOT NULL, UTYPE_CODE VARCHAR(50) NOT NULL, SUPPRESSED CHAR(1), USED_FROM_DT TIMESTAMP, USED_THRU_DT TIMESTAMP, FIRST_SEEN_DT TIMESTAMP, LAST_SEEN_DT TIMESTAMP) PARTITION BY HASH(RES_ENT_ID);
CREATE TABLE RES_FEAT_EKEY_0_NEW PARTITION OF RES_FEAT_EKEY_NEW FOR VALUES WITH (modulus 8, remainder 0) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_EKEY_1_NEW PARTITION OF RES_FEAT_EKEY_NEW FOR VALUES WITH (modulus 8, remainder 1) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_EKEY_2_NEW PARTITION OF RES_FEAT_EKEY_NEW FOR VALUES WITH (modulus 8, remainder 2) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_EKEY_3_NEW PARTITION OF RES_FEAT_EKEY_NEW FOR VALUES WITH (modulus 8, remainder 3) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_EKEY_4_NEW PARTITION OF RES_FEAT_EKEY_NEW FOR VALUES WITH (modulus 8, remainder 4) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_EKEY_5_NEW PARTITION OF RES_FEAT_EKEY_NEW FOR VALUES WITH (modulus 8, remainder 5) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_EKEY_6_NEW PARTITION OF RES_FEAT_EKEY_NEW FOR VALUES WITH (modulus 8, remainder 6) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_EKEY_7_NEW PARTITION OF RES_FEAT_EKEY_NEW FOR VALUES WITH (modulus 8, remainder 7) WITH (FILLFACTOR=90);
COPY RES_FEAT_EKEY TO '/var/lib/postgresql/data/rfe.csv' DELIMITER ',';
-- COPY (SELECT * FROM RES_FEAT_EKEY) TO '/var/lib/postgresql/data/rfe.csv' DELIMITER ','; -- If partitioned already
COPY RES_FEAT_EKEY_NEW FROM '/var/lib/postgresql/data/rfe.csv' DELIMITER ',';
-- note that the default schema has this as a unique index but it is just the same fields in a different order from the primary key so it does not need to be unique
-- CREATE INDEX RES_FEAT_EKEY_SK ON RES_FEAT_EKEY(RES_ENT_ID, LENS_ID, LIB_FEAT_ID, UTYPE_CODE) ;
-- Includes are helpful if you are doing a lot of read-only queries. Otherwise it causes a log of extra writing in loads.
CREATE INDEX RES_FEAT_EKEY_SK2 ON RES_FEAT_EKEY_NEW (RES_ENT_ID); -- INCLUDE (LIB_FEAT_ID,FTYPE_ID,UTYPE_CODE,USED_FROM_DT,USED_THRU_DT,FIRST_SEEN_DT,LAST_SEEN_DT,SUPPRESSED);
CREATE UNIQUE INDEX RES_FEAT_EKEY_PK2 ON RES_FEAT_EKEY_NEW (LIB_FEAT_ID, RES_ENT_ID, UTYPE_CODE);
-- DANGER: DROP TABLE RES_FEAT_EKEY;
ALTER TABLE RES_FEAT_EKEY_NEW RENAME TO RES_FEAT_EKEY;
--
-- RES_FEAT_STAT
--
CREATE TABLE RES_FEAT_STAT_NEW (LENS_ID smallint NOT NULL, LIB_FEAT_ID BIGINT NOT NULL, ECLASS_ID smallint NOT NULL, NUM_RES_ENT int NOT NULL, NUM_RES_ENT_OOM int NOT NULL, CANDIDATE_CAP_REACHED CHAR(1) DEFAULT 'N' NOT NULL, SCORING_CAP_REACHED CHAR(1) DEFAULT 'N' NOT NULL, PRIMARY KEY(LIB_FEAT_ID)) PARTITION BY HASH(LIB_FEAT_ID);
CREATE TABLE RES_FEAT_STAT_0_NEW PARTITION OF RES_FEAT_STAT_NEW FOR VALUES WITH (modulus 8, remainder 0) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_STAT_1_NEW PARTITION OF RES_FEAT_STAT_NEW FOR VALUES WITH (modulus 8, remainder 1) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_STAT_2_NEW PARTITION OF RES_FEAT_STAT_NEW FOR VALUES WITH (modulus 8, remainder 2) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_STAT_3_NEW PARTITION OF RES_FEAT_STAT_NEW FOR VALUES WITH (modulus 8, remainder 3) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_STAT_4_NEW PARTITION OF RES_FEAT_STAT_NEW FOR VALUES WITH (modulus 8, remainder 4) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_STAT_5_NEW PARTITION OF RES_FEAT_STAT_NEW FOR VALUES WITH (modulus 8, remainder 5) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_STAT_6_NEW PARTITION OF RES_FEAT_STAT_NEW FOR VALUES WITH (modulus 8, remainder 6) WITH (FILLFACTOR=90);
CREATE TABLE RES_FEAT_STAT_7_NEW PARTITION OF RES_FEAT_STAT_NEW FOR VALUES WITH (modulus 8, remainder 7) WITH (FILLFACTOR=90);
INSERT INTO RES_FEAT_STAT_NEW SELECT * FROM RES_FEAT_STAT;
-- DANGER: DROP TABLE RES_FEAT_STAT;
ALTER TABLE RES_FEAT_STAT_NEW RENAME TO RES_FEAT_STAT;
--
-- RES_ENT
--
CREATE TABLE RES_ENT_NEW (RES_ENT_ID BIGINT NOT NULL, LENS_ID smallint NOT NULL, ECLASS_ID smallint NOT NULL, INTEREST_LEVEL smallint, CONFUSION_LEVEL smallint, NUM_OBS_ENT int, FIRST_SEEN_DT TIMESTAMP, LAST_SEEN_DT TIMESTAMP, LAST_TOUCH_DT BIGINT, LOCKING_ID BIGINT NOT NULL, NODE_NAME VARCHAR(50) NOT NULL, LOCK_DSRC_ACTION CHAR(1), PRIMARY KEY(RES_ENT_ID)) PARTITION BY HASH(RES_ENT_ID);
CREATE TABLE RES_ENT_0 PARTITION OF RES_ENT_NEW FOR VALUES WITH (modulus 4, remainder 0) WITH (FILLFACTOR=90);
CREATE TABLE RES_ENT_1 PARTITION OF RES_ENT_NEW FOR VALUES WITH (modulus 4, remainder 1) WITH (FILLFACTOR=90);
CREATE TABLE RES_ENT_2 PARTITION OF RES_ENT_NEW FOR VALUES WITH (modulus 4, remainder 2) WITH (FILLFACTOR=90);
CREATE TABLE RES_ENT_3 PARTITION OF RES_ENT_NEW FOR VALUES WITH (modulus 4, remainder 3) WITH (FILLFACTOR=90);
INSERT INTO RES_ENT_NEW SELECT * FROM RES_ENT;
-- DANGER: DROP TABLE RES_ENT;
ALTER TABLE RES_ENT_NEW RENAME TO RES_ENT;
--
-- DSRC_RECORD
--
CREATE TABLE DSRC_RECORD_NEW (DSRC_ID smallint NOT NULL, RECORD_ID VARCHAR(250) NOT NULL, ETYPE_ID smallint NOT NULL, ENT_SRC_KEY VARCHAR(250) NOT NULL, OBS_ENT_HASH CHAR(40) NOT NULL, JSON_DATA TEXT, CONFIG_ID BIGINT, FIRST_SEEN_DT TIMESTAMP, LAST_SEEN_DT TIMESTAMP, PRIMARY KEY(RECORD_ID, DSRC_ID)) PARTITION BY HASH(RECORD_ID);
CREATE TABLE DSRC_RECORD_0 PARTITION OF DSRC_RECORD_NEW FOR VALUES WITH (modulus 4, remainder 0) WITH (FILLFACTOR=90);
CREATE TABLE DSRC_RECORD_1 PARTITION OF DSRC_RECORD_NEW FOR VALUES WITH (modulus 4, remainder 1) WITH (FILLFACTOR=90);
CREATE TABLE DSRC_RECORD_2 PARTITION OF DSRC_RECORD_NEW FOR VALUES WITH (modulus 4, remainder 2) WITH (FILLFACTOR=90);
CREATE TABLE DSRC_RECORD_3 PARTITION OF DSRC_RECORD_NEW FOR VALUES WITH (modulus 4, remainder 3) WITH (FILLFACTOR=90);
CREATE INDEX DSRC_RECORD_SK_NEW ON DSRC_RECORD_NEW(ENT_SRC_KEY, DSRC_ID) ;
INSERT INTO DSRC_RECORD_NEW SELECT * FROM DSRC_RECORD;
-- DANGER: DROP TABLE DSRC_RECORD;
ALTER TABLE DSRC_RECORD_NEW RENAME TO DSRC_RECORD;
--
-- RES_REL_EKEY
--
CREATE TABLE RES_REL_EKEY_NEW (RES_ENT_ID BIGINT NOT NULL, LENS_ID smallint NOT NULL, REL_ENT_ID BIGINT NOT NULL, RES_REL_ID BIGINT NOT NULL, PRIMARY KEY(RES_ENT_ID, REL_ENT_ID)) PARTITION BY HASH(RES_ENT_ID);
CREATE TABLE RES_REL_EKEY_0 PARTITION OF RES_REL_EKEY_NEW FOR VALUES WITH (modulus 4, remainder 0) WITH (FILLFACTOR=100);
CREATE TABLE RES_REL_EKEY_1 PARTITION OF RES_REL_EKEY_NEW FOR VALUES WITH (modulus 4, remainder 1) WITH (FILLFACTOR=100);
CREATE TABLE RES_REL_EKEY_2 PARTITION OF RES_REL_EKEY_NEW FOR VALUES WITH (modulus 4, remainder 2) WITH (FILLFACTOR=100);
CREATE TABLE RES_REL_EKEY_3 PARTITION OF RES_REL_EKEY_NEW FOR VALUES WITH (modulus 4, remainder 3) WITH (FILLFACTOR=100);
INSERT INTO RES_REL_EKEY_NEW SELECT * FROM RES_REL_EKEY;
-- DANGER: DROP TABLE RES_REL_EKEY;
ALTER TABLE RES_REL_EKEY_NEW RENAME TO RES_REL_EKEY;
--
-- RES_RELATE
--
CREATE TABLE RES_RELATE_NEW (RES_REL_ID BIGINT NOT NULL, LENS_ID smallint NOT NULL, MIN_RES_ENT_ID BIGINT NOT NULL, MAX_RES_ENT_ID BIGINT NOT NULL, REL_STRENGTH smallint, REL_STATUS smallint, IS_DISCLOSED smallint, IS_AMBIGUOUS smallint, INTEREST_LEVEL smallint, CONFUSION_LEVEL smallint, LAST_ER_ID BIGINT, LAST_REF_SCORE smallint, LAST_ERRULE_ID smallint, MATCH_KEY TEXT, MATCH_LEVELS VARCHAR(50), FIRST_SEEN_DT TIMESTAMP, LAST_SEEN_DT TIMESTAMP, PRIMARY KEY(RES_REL_ID)) PARTITION BY HASH(RES_REL_ID);
CREATE TABLE RES_RELATE_0 PARTITION OF RES_RELATE_NEW FOR VALUES WITH (modulus 4, remainder 0) WITH (FILLFACTOR=90);
CREATE TABLE RES_RELATE_1 PARTITION OF RES_RELATE_NEW FOR VALUES WITH (modulus 4, remainder 1) WITH (FILLFACTOR=90);
CREATE TABLE RES_RELATE_2 PARTITION OF RES_RELATE_NEW FOR VALUES WITH (modulus 4, remainder 2) WITH (FILLFACTOR=90);
CREATE TABLE RES_RELATE_3 PARTITION OF RES_RELATE_NEW FOR VALUES WITH (modulus 4, remainder 3) WITH (FILLFACTOR=90);
INSERT INTO RES_RELATE_NEW SELECT * FROM RES_RELATE;
-- DANGER: DROP TABLE RES_RELATE;
ALTER TABLE RES_RELATE_NEW RENAME TO RES_RELATE;
--
-- OBS_ENT
--
CREATE TABLE OBS_ENT_NEW (OBS_ENT_ID BIGINT NOT NULL, ETYPE_ID smallint NOT NULL, DSRC_ID smallint NOT NULL, ENT_SRC_KEY VARCHAR(250) NOT NULL, ENT_SRC_DESC VARCHAR(250), FROM_LENS_ID smallint, FROM_RES_ENT_ID BIGINT, FIRST_SEEN_DT TIMESTAMP, LAST_SEEN_DT TIMESTAMP, LAST_TOUCH_DT BIGINT, LOCKING_ID BIGINT NOT NULL, NODE_NAME VARCHAR(50) NOT NULL, LOCK_DSRC_ACTION CHAR(1), FEATURES TEXT) PARTITION BY HASH(ENT_SRC_KEY) ;
CREATE TABLE OBS_ENT_0_NEW PARTITION OF OBS_ENT_NEW FOR VALUES WITH (modulus 4, remainder 0) WITH (FILLFACTOR=75);
CREATE TABLE OBS_ENT_1_NEW PARTITION OF OBS_ENT_NEW FOR VALUES WITH (modulus 4, remainder 1) WITH (FILLFACTOR=75);
CREATE TABLE OBS_ENT_2_NEW PARTITION OF OBS_ENT_NEW FOR VALUES WITH (modulus 4, remainder 2) WITH (FILLFACTOR=75);
CREATE TABLE OBS_ENT_3_NEW PARTITION OF OBS_ENT_NEW FOR VALUES WITH (modulus 4, remainder 3) WITH (FILLFACTOR=75);
COPY OBS_ENT TO '/var/lib/postgresql/data/oe.csv' DELIMITER ',';
-- COPY (SELECT * FROM OBS_ENT) TO '/var/lib/postgresql/data/oe.csv' DELIMITER ','; -- If partitioned already
COPY OBS_ENT_NEW FROM '/var/lib/postgresql/data/oe.csv' DELIMITER ',';
CREATE UNIQUE INDEX OBS_ENT_NEW_PK ON OBS_ENT_NEW(ENT_SRC_KEY,DSRC_ID);
-- note this is the primary key on the default schema. we have to choose which index is unique and the ENT_SRC_KEY must be the one programmatically.
CREATE INDEX OBS_ENT_NEW_SK ON OBS_ENT_NEW(OBS_ENT_ID);
-- DANGER: DROP TABLE OBS_ENT;
ALTER TABLE OBS_ENT_NEW RENAME TO OBS_ENT;
CREATE TABLE LIB_FEAT_NEW (LIB_FEAT_ID BIGINT NOT NULL, FTYPE_ID smallint NOT NULL, FEAT_HASH CHAR(40) NOT NULL, FEAT_DESC VARCHAR(150), FELEM_VALUES TEXT NOT NULL, ANONYMIZED VARCHAR(5) NOT NULL, VERSION smallint NOT NULL) PARTITION BY HASH(FEAT_HASH);
CREATE TABLE LIB_FEAT_0_NEW PARTITION OF LIB_FEAT_NEW FOR VALUES WITH (modulus 4, remainder 0) WITH (FILLFACTOR=100);
CREATE TABLE LIB_FEAT_1_NEW PARTITION OF LIB_FEAT_NEW FOR VALUES WITH (modulus 4, remainder 1) WITH (FILLFACTOR=100);
CREATE TABLE LIB_FEAT_2_NEW PARTITION OF LIB_FEAT_NEW FOR VALUES WITH (modulus 4, remainder 2) WITH (FILLFACTOR=100);
CREATE TABLE LIB_FEAT_3_NEW PARTITION OF LIB_FEAT_NEW FOR VALUES WITH (modulus 4, remainder 3) WITH (FILLFACTOR=100);
COPY LIB_FEAT TO '/var/lib/postgresql/data/lf.csv' DELIMITER ',';
COPY LIB_FEAT_NEW FROM '/var/lib/postgresql/data/lf.csv' DELIMITER ',';
CREATE INDEX LIB_FEAT_SK_NEW ON LIB_FEAT_NEW(LIB_FEAT_ID) ;
CREATE UNIQUE INDEX LIB_FEAT_PK_NEW ON LIB_FEAT_NEW(FEAT_HASH,FTYPE_ID,ANONYMIZED) ;
-- DANGER: DROP TABLE LIB_FEAT;
ALTER TABLE LIB_FEAT_NEW RENAME TO LIB_FEAT;
ALTER TABLE RES_ENT_OKEY SET (FILLFACTOR=75);
-- grant all on all tables in schema "schema_name" to user