We conclude a mini-series on working with aggregates in PostgreSQL:
And today we'll talk about how you can reduce the total latency for inserting many changes into the aggregate tables by using staging tables and external processing.
Let's talk about this using the example of the collector of our PostgreSQL log analysis service , which I already talked about in previous articles:
From the point of view of further work with the table of aggregates, it is always advantageous to have a single copy of the record in it, and not try to overwrite it many times, so we have at our disposal options for intermediate aggregation in the memory of the process and a separate "rolling" of changes .
( "10 +1", "1 +10"), .
", !.."
, / ( ) . :
, .
, COPY
, "" + , INSERT ON CONFLICT ... DO UPDATE
. , - , UPDATE
! ""?..
RowExclusiveLock
xmax
WAL- heap
- , HOT update
- "" "" . UPDATE
, ...
-
, - , WAL-, UPDATE' - "" INSERT
... !
:
CREATE UNLOGGED TABLE px$agg(
LIKE agg
);
UNLOGGED
, "DBA: ".
- .
-
- - , - . , , - , .
-, " pull-".
10 - (, Seq Scan
, "" ), PostgreSQL , .
, , "SQL HowTo: 1000 ".
, , - TRUNCATE
:
BEGIN;
INSERT INTO agg
SELECT
pk1
...
, pkN
, <aggfunc>(val1) -- sum/min/max/...
, <aggfunc>(val2)
...
FROM
px$agg -- Seq Scan
GROUP BY -- PK = (pk1, ..., pkN)
pk1
...
, pkN;
TRUNCATE px$agg;
COMMIT;
, , , TRUNCATE
, - .
, -, - - . , .
, , LOCK
, , - .
... !
BEGIN;
SET LOCAL lock_timeout = '100ms'; -- 100
LOCK TABLE px$agg IN ACCESS EXCLUSIVE MODE; -- ,
ALTER TABLE px$agg RENAME TO px$agg_swap; --
ALTER TABLE px$agg_ RENAME TO px$agg;
ALTER TABLE px$agg_swap RENAME TO px$agg_;
COMMIT;
, . - , .
, RENAME
, - :
px -> px0, px1 -> px
px -> px1, px0 -> px
- :
10% , .