Enumerated type and PostgreSQL



Prologue



, , , . , . , , . , - . : , , / in/out, - -, . PostgreSQL . .



( boolean)



. boolean. . -, , «», — «». -, boolean , , , . , XOR . -, . , , , .



, , "other", , , , , , . , . null boolean. null « », , "sex" , . "other" — , , «». sex boolean, .





Enum — PostgreSQL



PostgreSQL , , enum. :



CREATE TYPE sex AS ENUM ('', '', '');


:



select id from table where sex='';


, , , : "Cherchez la femme".



63 ( UTF-8, ). 4 . , , — . , . pg_enum, OID. , . . , syntax error, , .



, , enum. , ( ), (, ALTER TYPE). : 4 , , . , Open Source : « , ».



Char — PostgreSQL



, PostgreSQL , "char" . , char( ). "char" 1 , .. 4 , enum. UTF-8 , , — . , , - . , m, f, x. : , , , . . , domain .



CREATE DOMAIN sex_char AS "char" CHECK (VALUE in ('m','f','x'));
CREATE FUNCTION sex(txt varchar, OUT ch sex_char) LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE AS
$sex$
BEgin
    ch:= case txt
        when '' then 'm'::sex_char
        when '' then 'f'::sex_char
        when '' then 'x'::sex_char
        else null
        end;
    if ch is null then
        raise invalid_parameter_value;
    end if;
END
$sex$;
CREATE FUNCTION sex(ch sex_char, OUT txt varchar) LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE AS
$sex$
BEgin
    txt:= case ch
        when 'm'::sex_char then ''
        when 'f'::sex_char then ''
        when 'x'::sex_char then ''
        else null
        end;
    if txt is null then
        raise invalid_parameter_value;
    end if;
END
$sex$;


, , . :



=> select sex(ch=>'f');
 sex
---------
 
(1 row)
=> select sex(txt=>'');
 sex
-----
 f
(1 row)


( ) , , , . , , . , , :



=> select sex(sex(txt=>''));
sex
---------

(1 row)


:



select id from table where sex='f';
select id from table where sex=sex(txt=>'');


: 1 , .





.



create table sex_t (
   sex_t_id smallint primary key,
   sex varchar not null unique
);


. :



select id from table join sex_t using (sex_t_id) where sex='';


, enum. , enum : , ; , - , .. : 2 4 (.. , enum).





, . . , « », smallint serial. sequence 0 ( ), , ( ): , . 10.



, , . , , , .



«-», , , jsonb. , , jsonb, jsonb, .



, , , , , .







, , 75 % , 24 % 1 % . 23 , , 8 . , . , 1 . , , .



( , ), «» , . - , , . , , PostgreSQL , , .



10 000 000 , ( ). , index only scan. , count(*) count(id), .. , , .





, : MSI, 8 , 16 (hugepages 2 14 ), 0 swap. , , . CentOS 8, PostgreSQL 13 shared_buffers ( PostgreSQL) 14 .



100 , 100 , 10 000 . , .



postgresql.conf



postgresql.conf.



#   WAL      
wal_level = minimal
max_wal_senders = 0
#     ,   ""  .
random_page_cost = 1
#  
max_parallel_workers_per_gather=0
#  PostgreSQL
shared_buffers = 14GB


prewarm.sql



pg_prewarm.



select pg_prewarm('sex1');
select pg_prewarm('sex1_btree');
select pg_prewarm('sex2');
select pg_prewarm('sex2_btree');
select pg_prewarm('sex3');
select pg_prewarm('sex3_btree');
select pg_prewarm('sex4');
select pg_prewarm('sex4_btree');
select pg_prewarm('sex5');
select pg_prewarm('sex5_btree');
select pg_prewarm('sex5h');
select pg_prewarm('sex5h_hash');
select pg_prewarm('sex6');
select pg_prewarm('sex6_gin');
select pg_prewarm('sex6h');
select pg_prewarm('sex6h_gin_hash');


test.sql



. (pg_prewarm ). , count(id), index only scan.



select count(id) from sex1 where sex='';
select count(id) from sex1 where sex='';
select count(id) from sex1 where sex='';
select count(id) from sex2 where sex_char=sex(txt=>'');
select count(id) from sex2 where sex_char=sex(txt=>'');
select count(id) from sex2 where sex_char=sex(txt=>'');
select count(id) from sex3 join sex_t using (sex_t_id) where sex='';
select count(id) from sex3 join sex_t using (sex_t_id) where sex='';
select count(id) from sex3 join sex_t using (sex_t_id) where sex='';
select count(id) from sex3 where sex_t_id=(select t.sex_t_id from sex_t t where sex='');
select count(id) from sex3 where sex_t_id=(select t.sex_t_id from sex_t t where sex='');
select count(id) from sex3 where sex_t_id=(select t.sex_t_id from sex_t t where sex='');
select count(id) from sex4 join sex_t4 using (sex_t4_id) where sex='';
select count(id) from sex4 join sex_t4 using (sex_t4_id) where sex='';
select count(id) from sex4 join sex_t4 using (sex_t4_id) where sex='';
select count(id) from sex4 where sex_t4_id=(select t.sex_t4_id from sex_t4 t where sex='');
select count(id) from sex4 where sex_t4_id=(select t.sex_t4_id from sex_t4 t where sex='');
select count(id) from sex4 where sex_t4_id=(select t.sex_t4_id from sex_t4 t where sex='');
select count(id) from sex5 where sex='';
select count(id) from sex5 where sex='';
select count(id) from sex5 where sex='';
select count(id) from sex5h where sex='';
select count(id) from sex5h where sex='';
select count(id) from sex5h where sex='';
select count(id) from sex6 where jdoc@>'{"sex":""}';
select count(id) from sex6 where jdoc@>'{"sex":""}';
select count(id) from sex6 where jdoc@>'{"sex":""}';
select count(id) from sex6h where jdoc@>'{"sex":""}';
select count(id) from sex6h where jdoc@>'{"sex":""}';
select count(id) from sex6h where jdoc@>'{"sex":""}';


init.sql



:



--  ,     
\set table_size 10000000

--  view       
create or replace view disk as SELECT n.nspname AS schema,
    c.relname,
    pg_size_pretty(pg_relation_size(c.oid::regclass)) AS size,
    pg_relation_size(c.oid::regclass)/1024 AS size_KiB
   FROM pg_class c
     LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
  ORDER BY (pg_relation_size(c.oid::regclass)) DESC
 LIMIT 20;

begin;

-- sex1  enum
CREATE TYPE sex_enum AS ENUM ('', '', '');
create table sex1 (id float, sex sex_enum not null);

-- sex2 "char"
CREATE DOMAIN sex_char AS "char" CHECK (VALUE in ('m','f','x'));
CREATE FUNCTION sex(txt varchar, OUT ch sex_char) LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE AS
$sex$
BEgin
    ch:= case txt
        when '' then 'm'::sex_char
        when '' then 'f'::sex_char
        when '' then 'x'::sex_char
        else null
        end;
    if ch is null then
        raise invalid_parameter_value;
    end if;
END
$sex$;
CREATE FUNCTION sex(ch sex_char, OUT txt varchar) LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE AS
$sex$
BEgin
    txt:= case ch
        when 'm'::sex_char then ''
        when 'f'::sex_char then ''
        when 'x'::sex_char then ''
        else null
        end;
    if txt is null then
        raise invalid_parameter_value;
    end if;
END
$sex$;

create table sex2 (id float, sex_char "char" not null);

-- sex3   c  smallint
create table sex_t (
  sex_t_id smallint primary key,
  sex varchar not null unique
);
insert into sex_t (sex_t_id,sex) values (1,''),(0,''),(-1,'');
create table sex3 (id float, sex_t_id smallint not null references sex_t);

-- sex4  serial,      ,  ,      
create table sex_t4 (
  sex_t4_id serial primary key,
  sex varchar not null unique
);
insert into sex_t4 (sex_t4_id,sex) values (1,''),(0,''),(-1,'');
create table sex4 (id float, sex_t4_id integer not null references sex_t4);

--  
create table sex_t5 (
  sex varchar primary key
);
insert into sex_t5 (sex) values (''),(''),('');
--  btree 
create table sex5 (id float, sex varchar not null references sex_t5);
--  hash 
create table sex5h (id float, sex varchar not null references sex_t5);

-- jsonb
--   gin 
create table sex6 (id float, jdoc jsonb not null);
--  gin       
create table sex6h (id float, jdoc jsonb not null);

--  
insert into sex1 (id,sex) select random, case when random<0.75 then ''::sex_enum when random<0.99 then ''::sex_enum else ''::sex_enum end from (select random() as random, generate_series(1,:table_size)) as subselect;
insert into sex5 (id,sex) select id,sex::varchar from sex1;
insert into sex2 (id,sex_char) select id,sex(sex) from sex5;
insert into sex3 (id,sex_t_id) select id,sex_t_id from sex5 join sex_t using (sex);
insert into sex4 (id,sex_t4_id) select id,sex_t4_id from sex5 join sex_t4 using (sex);
insert into sex5h (id,sex) select id,sex from sex5;
insert into sex6 (id,jdoc) select id,('{"sex": "'||sex||'"}')::jsonb from sex5;
insert into sex6h (id,jdoc) select id,jdoc from sex6;

--  
create index sex1_btree on sex1(sex);
create index sex2_btree on sex2(sex_char);
create index sex3_btree on sex3(sex_t_id);
create index sex4_btree on sex4(sex_t4_id);
create index sex5_btree on sex5(sex);
--    hash
create index sex5h_hash on sex5h using hash(sex);
create index sex6_gin on sex6 using gin(jdoc);
--  ,  , hash
create index sex6h_gin_hash on sex6h using gin(jdoc jsonb_path_ops);

commit;

set role postgres;

--    (  PostgreSQL)
create extension if not exists pg_prewarm;

--      
create extension if not exists pg_buffercache;
create or replace view cache as SELECT n.nspname AS schema,
    c.relname,
    pg_size_pretty(count(*) * 8192) AS buffered,
    count(*) * 8 AS buffered_KiB,
    round(100.0 * count(*)::numeric / ((( SELECT pg_settings.setting
           FROM pg_settings
          WHERE pg_settings.name = 'shared_buffers'::text))::integer)::numeric, 1) AS buffer_percent,
    round(100.0 * count(*)::numeric * 8192::numeric / pg_table_size(c.oid::regclass)::numeric, 1) AS percent_of_relation
   FROM pg_class c
     JOIN pg_buffercache b ON b.relfilenode = c.relfilenode
     JOIN pg_database d ON b.reldatabase = d.oid AND d.datname = current_database()
     LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
  GROUP BY c.oid, n.nspname, c.relname
  ORDER BY buffered_kib DESC
 LIMIT 20;

--  vacuum
vacuum freeze analyze;


test



:



#!/bin/sh
set -o errexit -o noclobber -o nounset -o pipefail
#set -o errexit -o noclobber -o nounset -o pipefail -o xtrace
# for pgbench
PATH="$PATH:/usr/pgsql-13/bin"
# config
# database connection parameters
readonly PGDATABASE='sex'
readonly PGPORT=5432
export PGDATABASE PGPORT
# output data file
readonly data_csv='data.csv'

# init data files
readonly header='sex:,male,female,other'

if [ ! -s "$data_csv" ]
then
    echo "$header" >|"$data_csv"
fi

# prewarm to the cache
psql --quiet -f prewarm.sql >/dev/null
# more prewarm
pgbench --no-vacuum --transaction 100 --file test.sql >/dev/null

for i in $(seq 1 100)
do
   echo -n "$i "
   date --iso-8601=seconds
    pgbench --no-vacuum --transaction 100 --report-latencies --file 'test.sql' | \
        awk "
            /from sex1 where sex='';\$/ {printf \"enum,%s,\", \$1 >>\"$data_csv\";}
            /from sex1 where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex1 where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            /from sex2 where sex_char=sex\(txt=>''\);\$/ {printf \"\\\"char\\\",%s,\", \$1 >>\"$data_csv\";}
            /from sex2 where sex_char=sex\(txt=>''\);\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex2 where sex_char=sex\(txt=>''\);\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            /from sex3 join sex_t using \(sex_t_id\) where sex='';\$/ {printf \"smallint(join),%s,\", \$1 >>\"$data_csv\";}
            /from sex3 join sex_t using \(sex_t_id\) where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex3 join sex_t using \(sex_t_id\) where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            /from sex3 where sex_t_id=\(select t.sex_t_id from sex_t t where sex=''\);\$/ {printf \"smallint(subsel),%s,\", \$1 >>\"$data_csv\";}
            /from sex3 where sex_t_id=\(select t.sex_t_id from sex_t t where sex=''\);\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex3 where sex_t_id=\(select t.sex_t_id from sex_t t where sex=''\);\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            /from sex4 join sex_t4 using \(sex_t4_id\) where sex='';\$/ {printf \"integer(join),%s,\", \$1 >>\"$data_csv\";}
            /from sex4 join sex_t4 using \(sex_t4_id\) where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex4 join sex_t4 using \(sex_t4_id\) where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            /from sex4 where sex_t4_id=\(select t.sex_t4_id from sex_t4 t where sex=''\);\$/ {printf \"integer(subsel),%s,\", \$1 >>\"$data_csv\";}
            /from sex4 where sex_t4_id=\(select t.sex_t4_id from sex_t4 t where sex=''\);\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex4 where sex_t4_id=\(select t.sex_t4_id from sex_t4 t where sex=''\);\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            /from sex5 where sex='';\$/ {printf \"varchar(btree),%s,\", \$1 >>\"$data_csv\";}
            /from sex5 where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex5 where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            /from sex5h where sex='';\$/ {printf \"varchar(hash),%s,\", \$1 >>\"$data_csv\";}
            /from sex5h where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex5h where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            /from sex6 where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"jsonb(gin),%s,\", \$1 >>\"$data_csv\";}
            /from sex6 where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex6 where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            /from sex6h where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"jsonb(gin+hash),%s,\", \$1 >>\"$data_csv\";}
            /from sex6h where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
            /from sex6h where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
            "
done
echo 'Done'




=> \dt+
                          List of relations
 Schema |  Name  | Type  | Owner | Persistence |  Size  | Description
--------+--------+-------+-------+-------------+--------+-------------
 public | sex1   | table | olleg | permanent   | 422 MB |
 public | sex2   | table | olleg | permanent   | 422 MB |
 public | sex3   | table | olleg | permanent   | 422 MB |
 public | sex4   | table | olleg | permanent   | 422 MB |
 public | sex5   | table | olleg | permanent   | 498 MB |
 public | sex5h  | table | olleg | permanent   | 498 MB |
 public | sex6   | table | olleg | permanent   | 651 MB |
 public | sex6h  | table | olleg | permanent   | 651 MB |
 public | sex_t  | table | olleg | permanent   | 48 kB  |
 public | sex_t4 | table | olleg | permanent   | 48 kB  |
 public | sex_t5 | table | olleg | permanent   | 48 kB  |
(11 rows)
 => \di+
                                   List of relations
 Schema |      Name      | Type  | Owner | Table  | Persistence |  Size  | Description
--------+----------------+-------+-------+--------+-------------+--------+-------------
 public | sex1_btree     | index | olleg | sex1   | permanent   | 66 MB  |
 public | sex2_btree     | index | olleg | sex2   | permanent   | 66 MB  |
 public | sex3_btree     | index | olleg | sex3   | permanent   | 66 MB  |
 public | sex4_btree     | index | olleg | sex4   | permanent   | 66 MB  |
 public | sex5_btree     | index | olleg | sex5   | permanent   | 67 MB  |
 public | sex5h_hash     | index | olleg | sex5h  | permanent   | 448 MB |
 public | sex6_gin       | index | olleg | sex6   | permanent   | 21 MB  |
 public | sex6h_gin_hash | index | olleg | sex6h  | permanent   | 10 MB  |
 public | sex_t4_pkey    | index | olleg | sex_t4 | permanent   | 16 kB  |
 public | sex_t4_sex_key | index | olleg | sex_t4 | permanent   | 16 kB  |
 public | sex_t5_pkey    | index | olleg | sex_t5 | permanent   | 16 kB  |
 public | sex_t_pkey     | index | olleg | sex_t  | permanent   | 16 kB  |
 public | sex_t_sex_key  | index | olleg | sex_t  | permanent   | 16 kB  |
(13 rows)


, 1 2 4 , . , - PostgreSQL «». , , . , , ( ) .



json , , . , , , - , json, , , .



hash-, , . hash- ( ) . , PostgreSQL hash- hash , . PostgreSQL, .



, gin ( btree). , , . - , gin- , , .





75% , , . 24% , . 1% .



M$ Exel « » (, , ). , « » , , () , .



75%24%1%



json . :



enum "char"



  • , , "char" . , , PostgreSQL «». "char" , enum.
  • , 75% , 24% 1% — .
  • , enum integer () . , - : , 75% , integer , 75% enum ; 24% 1% enum , select join, select .


:



=> explain (costs false) select count(id) from sex1 where sex='';
                   QUERY PLAN
-------------------------------------------------
 Aggregate
   ->  Index Scan using sex1_btree on sex1
         Index Cond: (sex = ''::sex_enum)
(3 rows)
=> explain (costs false) select count(id) from sex2 where sex_char=sex(txt=>'');
                  QUERY PLAN
----------------------------------------------
 Aggregate
   ->  Index Scan using sex2_btree on sex2
         Index Cond: (sex_char = 'f'::"char")
(3 rows)


smallint integer



  • smallint integer (serial) . , , PostgreSQL - .
  • enum "char" 75% , . , , , . 1% 24% .
  • join (Nested Loop) - , . , , . .. : Nested Loop. , Nested Loop .


75% , , . . 1% 24% .



=> explain (costs false) select count(id) from sex3 join sex_t using (sex_t_id) where sex='';
                      QUERY PLAN
-------------------------------------------------------
 Aggregate
   ->  Nested Loop
         ->  Seq Scan on sex_t
               Filter: ((sex)::text = ''::text)
         ->  Index Scan using sex3_btree on sex3
               Index Cond: (sex_t_id = sex_t.sex_t_id)
(6 rows)
=> explain (costs false) select count(id) from sex3 where sex_t_id=(select t.sex_t_id from sex_t t where sex='');
                    QUERY PLAN
---------------------------------------------------
 Aggregate
   InitPlan 1 (returns $0)
     ->  Seq Scan on sex_t t
           Filter: ((sex)::text = ''::text)
   ->  Index Scan using sex3_btree on sex3
         Index Cond: (sex_t_id = $0)
(6 rows)


varchar



  • , , : 75% , 1% 24% — .
  • btree- , join, . .
  • Hash- , btree ( ). , : , hash- . , tuple ID «» hash-. 1, 2 3, .. . , - hash- PostgreSQL, - hash-.


btree hash-.



=> explain (costs false) select count(id) from sex5 where sex='';
                     QUERY PLAN
-----------------------------------------------------
 Aggregate
   ->  Index Scan using sex5_btree on sex5
         Index Cond: ((sex)::text = ''::text)
(3 rows)
=> explain (costs false) select count(id) from sex5h where sex='';
                     QUERY PLAN
-----------------------------------------------------
 Aggregate
   ->  Index Scan using sex5h_hash on sex5h
         Index Cond: ((sex)::text = ''::text)
(3 rows)


json



  • 75% . , , 75%. json? PostgreSQL . 1% 24% PostgreSQL .
  • ( jsonb_path_ops) ( 1% — , ), gin json.
  • , , json — .


=> explain (costs false) select count(id) from sex6 where jdoc@>'{"sex":""}';
                      QUERY PLAN
-------------------------------------------------------
 Aggregate
   ->  Seq Scan on sex6
         Filter: (jdoc @> '{"sex": ""}'::jsonb)
(3 rows)
=> explain (costs false) select count(id) from sex6 where jdoc@>'{"sex":""}';
                           QUERY PLAN
-----------------------------------------------------------------
 Aggregate
   ->  Bitmap Heap Scan on sex6
         Recheck Cond: (jdoc @> '{"sex": ""}'::jsonb)
         ->  Bitmap Index Scan on sex6_gin
               Index Cond: (jdoc @> '{"sex": ""}'::jsonb)
(5 rows)




, , enum — , , . , , , 4 enum , , 1 "char" 2 smallint , .




All Articles