#database

Postgresql

Changement de version

Avec archlinux tout va très vite. Et lorsqu´ils décident de changer de version majeure de postgres, après le reboot c´est le drame.

Comment gérer la crise?

Simplement pg_upgrade

# creer un noveau cluster postgres
initdb -D /mnt/cryptdisk/Postgres/data --locale=en_US.UTF-8

## installer les paquets postgresql-old-upgrade
pacman.install postgresql-old-update

## realiser la migration
pg_upgrade -b /opt/pgsql-11/bin -B /usr/bin -d /opt/postgres/data -D /mnt/cryptdisk/Postgres/data

## lancer le vacuum
./analyze_new_cluster.sh

## supprimer les anciennes données
./delete_old_cluster.sh

Il faut aussi penser à modifier les scripts systemd:

...
# en particulier la variable PGROOT
Environment=PGROOT=/mnt/cryptdisk/Postgres
...

Désormais, la dernière version de postgres est utilisée.

Calculer les statistiques

VACUUM ANALYZE <table>;

Récupérer les éléments de statistiques

SELECT schemaname, relname, last_analyze
FROM pg_stat_all_tables
WHERE relname = 'city';

Configurer le cluster

This website is helpful

Install postgis

source :

yum install postgis30_12.x86_64
yum install postgis30_12-utils.x86_64
-- Enable PostGIS (as of 3.0 contains just geometry/geography)
CREATE EXTENSION postgis;
-- enable raster support (for 3+)
CREATE EXTENSION postgis_raster;
-- Enable Topology
CREATE EXTENSION postgis_topology;
-- Enable PostGIS Advanced 3D
-- and other geoprocessing algorithms
-- sfcgal not available with all distributions
CREATE EXTENSION postgis_sfcgal;
-- fuzzy matching needed for Tiger
CREATE EXTENSION fuzzystrmatch;
-- rule based standardizer
CREATE EXTENSION address_standardizer;
-- example rule data set
CREATE EXTENSION address_standardizer_data_us;
-- Enable US Tiger Geocoder
CREATE EXTENSION postgis_tiger_geocoder;

Passer des variables à un script via psql

psql -v var1=foo -v var2=bar -f file.sql

On peut à la fois passer des variables simples et des variables inclues dans des chaines de caractères.

select *
from :var1
where :'var2'

Secure Dump script

#! /bin/sh
set -e
f=/srv/otherfs/thebackup
rm -f "$f.new" # Remove a previous, failed backup, if any
pg_dump -Fc -d ourdatabase >"$f.new"
pg_restore -l "$f.new" >/dev/null
mv "$f.new" "$f"
exit 0 # In case the previous line is a conditional like "if" or "&&" or "||".

Réaliser un dump simple

pg_dump database > /tmp/db.sql

Charger un dump simple

psql roundcube < /tmp/db.sql

Réaliser un dump optimisé

ce script réalise un dump binaire, avec 4 threads:

#!/bin/bash
set -e
TMZ=$(date +"%Y%m%d-%H%M%S")

PG_VERSION=12.1
PG_HOST=<the host>
DATABASE=<the database>
PG_USER=<the user>
THREAD=4
PG_FOLDER=/usr/pgsql-$PG_VERSION/bin
DUMP_FOLDER=/dump/$DATABASE/

$PG_FOLDER/pg_dump -h $PG_HOST -p 5432 -U $PG_USER -w -Fd $DATABASE -n <schema1> -n <schema2> -j$THREAD -f $DUMP_FOLDER/
echo "create database $DATABASE" > $DUMP_FOLDER/create_database.sql
echo "\c $DATABASE" >> $DUMP_FOLDER/create_database.sql
$PG_FOLDER/pg_dumpall -h $PG_HOST -p 5432 -U $PG_USER -w --globals-only >> $DUMP_FOLDER/create_database.sql

Charger un dump optimisé

#!/bin/bash
set -e
TMZ=$(date +"%Y%m%d-%H%M%S")

PG_VERSION=12
PG_HOST=<the host>
DATABASE=<the db>
USER=<the user>
THREAD=4
PG_FOLDER=/usr/pgsql-$PG_VERSION/bin
DUMP_FOLDER=/tmp/$DATABASE/

cat  $DUMP_FOLDER/create_database.sql > $PG_FOLDER/psql -U $USER -W -h  $PG_HOST -d postgres
$PG_FOLDER/pg_restore -Fd $DUMP_FOLDER -n <schema1> -n <schema2> -j$THREAD  -U $USER  -h  $PG_HOST -d $DATABASE

Table corrompue

psycopg2.errors.DataCorrupted: invalid page in block 2 of relation base/1687445/1688449

  1. SELECT oid, relname as damaged_table FROM pg_class WHERE oid=1688449;
  2. SET zero_damaged_pages = on;
  3. VACUUM FULL damaged_table;
  4. REINDEX TABLE damaged_table;

from

psqlrc

\set QUIET ON

\timing on
\pset pager 0
\x auto

-- Put a space in front of queries you don't want to save
\set HISTCONTROL ignorespace
\set HISTFILE ~/.psql_history- :DBNAME
\set HISTSIZE -1
\pset null 'NULL'
\pset border 2

\set QUIET OFF
\encoding unicode

\echo '\nCurrent Host Server Date Time : '`date` '\n'

\echo 'Administrative queries:\n'
\echo '\t\t\t:settings\t-- Server Settings'
\echo '\t\t\t:conninfo\t-- Server connections'
\echo '\t\t\t:activity\t-- Server activity'
\echo '\t\t\t:locks\t\t-- Lock info'
\echo '\t\t\t:waits\t\t-- Waiting queires'
\echo '\t\t\t:dbsize\t\t-- Database Size'
\echo '\t\t\t:tablesize\t-- Tables Size'
\echo '\t\t\t:uselesscol\t-- Useless columns'
\echo '\t\t\t:uptime\t\t-- Server uptime'
\echo '\t\t\t:menu\t\t-- Help Menu'
\echo '\t\t\t\\h\t\t-- Help with SQL commands'
\echo '\t\t\t\\?\t\t-- Help with psql commands\n'

\echo 'Development queries:\n'
\echo '\t\t\t:sp\t\t-- Current Search Path'
\echo '\t\t\t:clear\t\t-- Clear screen'
\echo '\t\t\t:ll\t\t-- List\n'

-- Administration queries

\set menu '\\i ~/.psqlrc'

\set settings 'select name, setting,unit,context from pg_settings;'

\set conninfo 'select usename, count(*) from pg_stat_activity group by usename;'

\set activity 'select datname, pid, usename, client_addr, substring(query, 1, 50) as query,  ( select array_agg(unnest) from (select distinct unnest(array_remove(array_remove(array_remove(array_remove(array_remove(array_remove(array_remove(regexp_matches(lower(query), ''\(from|into|join|update|delete\) +([A-z_\\."]+)'',''g''), NULL), ''from''), ''into''), ''update''), ''delete''),''set''), ''join''))  as unnest  ) as b) as tables, wait_event, wait_event_type, state_change from pg_stat_activity where state = \'active\' and query !~ \'pg_stat_activity\';'

\set activityQuery 'select  pid, usename, query from pg_stat_activity where state = \'active\' and query !~ \'pg_stat_activity\';'

\set waits 'SELECT pg_stat_activity.pid, pg_stat_activity.query, pg_stat_activity.waiting, now() - pg_stat_activity.query_start AS \"totaltime\", pg_stat_activity.backend_start FROM pg_stat_activity WHERE pg_stat_activity.query !~ \'%IDLE%\'::text AND pg_stat_activity.waiting = true;'

\set dbsize 'SELECT d.datname as Name, pg_catalog.pg_get_userbyid(d.datdba) as Owner, CASE WHEN pg_catalog.has_database_privilege(d.datname, \'CONNECT\') THEN pg_catalog.pg_size_pretty(pg_catalog.pg_database_size(d.datname)) ELSE \'No Access\' END as Size FROM pg_catalog.pg_database d order by CASE WHEN pg_catalog.has_database_privilege(d.datname, \'CONNECT\') THEN pg_catalog.pg_database_size(d.datname) ELSE NULL END desc LIMIT 20;'

\set tablesize ' SELECT relname AS "relation", pg_size_pretty(pg_total_relation_size(C.oid)) AS "total_size", pg_size_pretty(pg_indexes_size(C.oid)) "index_size", reltuples::bigint AS approximate_row_count FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) WHERE nspname NOT IN (\'pg_catalog\', \'information_schema\') AND C.relkind <> \'i\' AND nspname !~ \'^pg_toast\' ORDER BY pg_total_relation_size(C.oid) DESC LIMIT 50; '

\set uselesscol 'SELECT nspname, relname, attname, typname, (stanullfrac*100)::int AS null_percent, case when stadistinct &gt;= 0 then stadistinct else abs(stadistinct)*reltuples end AS \"distinct\", case 1 when stakind1 then stavalues1 when stakind2 then stavalues2 end AS \"values\" FROM pg_class c JOIN pg_namespace ns ON (ns.oid=relnamespace) JOIN pg_attribute ON (c.oid=attrelid) JOIN pg_type t ON (t.oid=atttypid) JOIN pg_statistic ON (c.oid=starelid AND staattnum=attnum) WHERE nspname NOT LIKE E\'pg\\\\_%\' AND nspname != \'information_schema\' AND relkind=\'r\' AND NOT attisdropped AND attstattarget != 0 AND reltuples &gt;= 100 AND stadistinct BETWEEN 0 AND 1 ORDER BY nspname, relname, attname;'

\set tmpfiles 'SELECT temp_files AS "Temporary files" , pg_size_pretty(temp_bytes) AS "Size of temporary files" FROM   pg_stat_database db order by 2 desc;'

\set uptime 'select now() - pg_postmaster_start_time() AS uptime;'

\set locks ' SELECT blocked_locks.pid     AS blocked_pid, blocked_activity.usename  AS blocked_user, blocking_locks.pid     AS blocking_pid, blocking_activity.usename AS blocking_user, substring(blocked_activity.query,1,50)    AS blocked_statement, substring(blocking_activity.query,1,50)   AS current_statement_in_blocking_process FROM  pg_catalog.pg_locks         blocked_locks JOIN pg_catalog.pg_stat_activity blocked_activity  ON blocked_activity.pid = blocked_locks.pid JOIN pg_catalog.pg_locks         blocking_locks ON blocking_locks.locktype = blocked_locks.locktype AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid AND blocking_locks.pid != blocked_locks.pid JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid; '

\set cleaning 'select relname,last_vacuum, last_autovacuum, last_analyze, last_autoanalyze from pg_stat_user_tables order by SUBSTRING(relname FROM ''([0-9]+)'')::BIGINT ASC;'

-- Development queries:

\set sp 'SHOW search_path;'
\set clear '\\! clear;'
\set ll '\\! ls -lrt;'

show enum values

\dT+

Show cache hit ratio

Value ~= 100% means shared memory is sufficient.

SELECT
  datname, 100 * blks_hit / (blks_hit + blks_read) as cache_hit_ratio
FROM pg_stat_database WHERE (blks_hit + blks_read) > 0;
+-----------+-----------------+
|  datname  | cache_hit_ratio |
+-----------+-----------------+
| NULL      |              99 |
| codimd    |              99 |
| synapse   |              99 |
| wallabag  |              99 |
| postgres  |              99 |
| pleroma   |              99 |
| miniflux  |              99 |
| funkwhale |              99 |
| peertube  |              99 |
| bitwarden |              99 |
| mailu     |              99 |
| roundcube |              99 |
+-----------+-----------------+

Read only user

-- create
CREATE ROLE the_user WITH LOGIN PASSWORD 'Test1234'
NOSUPERUSER INHERIT NOCREATEDB NOCREATEROLE NOREPLICATION VALID UNTIL 'infinity';
\connect the_database; -- essentiel de se connecter dans la db sinon marche pas
GRANT CONNECT ON DATABASE the_database TO the_user;
GRANT USAGE ON SCHEMA public TO the_user;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO the_user;
GRANT SELECT ON ALL SEQUENCES IN SCHEMA public TO the_user;
REVOKE CREATE ON SCHEMA public FROM PUBLIC;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO the_user;
ALTER USER the_user SET idle_in_transaction_session_timeout TO 60000; -- 1 minute

-- to drop the user
REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA public FROM the_user;
REVOKE ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public FROM the_user;
REVOKE ALL PRIVILEGES ON ALL FUNCTIONS IN SCHEMA public FROM the_user;
DROP OWNED BY the_user;
DROP role the_user;

blog

Query/query plan obfuscation

Order by alphanumeric

ORDER BY SUBSTRING(col_name FROM ''([0-9]+)'')::BIGINT ASC

you should always log postgres

Show create table

CREATE OR REPLACE FUNCTION public.generate_create_table_statement(p_table_name character varying)
  RETURNS SETOF text AS
$BODY$
DECLARE
    v_table_ddl   text;
    column_record record;
    table_rec record;
    constraint_rec record;
    firstrec boolean;
BEGIN
    FOR table_rec IN
        SELECT c.relname FROM pg_catalog.pg_class c
            LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
                WHERE relkind = 'r'
                AND relname~ ('^('||p_table_name||')$')
                AND n.nspname <> 'pg_catalog'
                AND n.nspname <> 'information_schema'
                AND n.nspname !~ '^pg_toast'
                AND pg_catalog.pg_table_is_visible(c.oid)
          ORDER BY c.relname
    LOOP

        FOR column_record IN
            SELECT
                b.nspname as schema_name,
                b.relname as table_name,
                a.attname as column_name,
                pg_catalog.format_type(a.atttypid, a.atttypmod) as column_type,
                CASE WHEN
                    (SELECT substring(pg_catalog.pg_get_expr(d.adbin, d.adrelid) for 128)
                     FROM pg_catalog.pg_attrdef d
                     WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef) IS NOT NULL THEN
                    'DEFAULT '|| (SELECT substring(pg_catalog.pg_get_expr(d.adbin, d.adrelid) for 128)
                                  FROM pg_catalog.pg_attrdef d
                                  WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef)
                ELSE
                    ''
                END as column_default_value,
                CASE WHEN a.attnotnull = true THEN
                    'NOT NULL'
                ELSE
                    'NULL'
                END as column_not_null,
                a.attnum as attnum,
                e.max_attnum as max_attnum
            FROM
                pg_catalog.pg_attribute a
                INNER JOIN
                 (SELECT c.oid,
                    n.nspname,
                    c.relname
                  FROM pg_catalog.pg_class c
                       LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
                  WHERE c.relname = table_rec.relname
                    AND pg_catalog.pg_table_is_visible(c.oid)
                  ORDER BY 2, 3) b
                ON a.attrelid = b.oid
                INNER JOIN
                 (SELECT
                      a.attrelid,
                      max(a.attnum) as max_attnum
                  FROM pg_catalog.pg_attribute a
                  WHERE a.attnum > 0
                    AND NOT a.attisdropped
                  GROUP BY a.attrelid) e
                ON a.attrelid=e.attrelid
            WHERE a.attnum > 0
              AND NOT a.attisdropped
            ORDER BY a.attnum
        LOOP
            IF column_record.attnum = 1 THEN
                v_table_ddl:='CREATE TABLE '||column_record.schema_name||'.'||column_record.table_name||' (';
            ELSE
                v_table_ddl:=v_table_ddl||',';
            END IF;

            IF column_record.attnum <= column_record.max_attnum THEN
                v_table_ddl:=v_table_ddl||chr(10)||
                         '    '||column_record.column_name||' '||column_record.column_type||' '||column_record.column_default_value||' '||column_record.column_not_null;
            END IF;
        END LOOP;

        firstrec := TRUE;
        FOR constraint_rec IN
            SELECT conname, pg_get_constraintdef(c.oid) as constrainddef
                FROM pg_constraint c
                    WHERE conrelid=(
                        SELECT attrelid FROM pg_attribute
                        WHERE attrelid = (
                            SELECT oid FROM pg_class WHERE relname = table_rec.relname
                        ) AND attname='tableoid'
                    )
        LOOP
            v_table_ddl:=v_table_ddl||','||chr(10);
            v_table_ddl:=v_table_ddl||'CONSTRAINT '||constraint_rec.conname;
            v_table_ddl:=v_table_ddl||chr(10)||'    '||constraint_rec.constrainddef;
            firstrec := FALSE;
        END LOOP;
        v_table_ddl:=v_table_ddl||');';
        RETURN NEXT v_table_ddl;
    END LOOP;
END;
$BODY$
  LANGUAGE plpgsql VOLATILE
  COST 100;

select generate_create_table_statement('my_table')

Reorder columns aka tetris

SELECT a.attname, t.typname, t.typalign, t.typlen 
FROM pg_class c 
JOIN pg_attribute a ON (a.attrelid = c.oid) 
JOIN pg_type t ON (t.oid = a.atttypid) 
WHERE c.relname = 'my_table' AND a.attnum >= 0 
ORDER BY t.typlen DESC

https://medium.com/@rhlkmr089/in-house-data-lake-with-cdc-processing-hudi-docker-878cee483ca0

cdc

React ?

This page was last modified: