环境介绍

版本信息

  • centos7

  • postgres 14.4

  • citus 110-2

安装步骤

  • 安装

  • 配置

sudo pg_conftool 14 main set wal_level logical
sudo pg_conftool 14 main set listen_addresses '*'
sudo pg_conftool 14 main set shared_preload_libraries citus
sudo vi /etc/postgresql/14/main/pg_hba.conf
  • 创建数据库免密码登录

    -- Edit .pgpass in the postgres user’s home directory, 
    hostname:port:database:username:password
    

节点信息

nodenanme IP role
master01 10.10.20.11 coordinator
worker01 10.10.2.12 worker
worker02 10.10.2.14 worker

搭建集群

创建database&extension

在每个worker节点上执行

CREATE DATABASE newbie;
\c newbie
CREATE EXTENSION citus;

在master接节点上执行

CREATE DATABASE newbie;
\c newbie
CREATE EXTENSION citus;
SELECT citus_set_coordinator_host('10.10.2.11', 5432);
#添加worker节点
SELECT * from citus_add_node('10.10.2.12', 5432);
SELECT * from citus_add_node('10.10.2.14', 5432);
-- ... for all of them

查看集群节点

SELECT * FROM citus_get_active_worker_nodes();
 node_name  | node_port 
------------+-----------
 10.10.2.12 |      5432
 10.10.2.14 |      5432
(2 rows)

查看节点分表

select * from pg_dist_node;
 nodeid | groupid |  nodename  | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards 
--------+---------+------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------
      1 |       0 | 10.10.2.11 |     5432 | default  | t           | t        | primary  | default     | t              | f
      3 |       2 | 10.10.2.12 |     5432 | default  | t           | t        | primary  | default     | t              | t
      4 |       3 | 10.10.2.14 |     5432 | default  | t           | t        | primary  | default     | t              | t

表管理

表类型

  • 本地表
  • 参考表
  • 分布表

本地表

与传统表使用方式一致,数据只存放在master节点

参考表

每个节点(master , worker)包含一份表的所有数据,对表的DML采用2pc。适用于存放业务元数据,便于与分布表联合查询使用

分布表

根据分布键(通常为表的指定列),将数据分布到每个worker节点中。每个worker节点包含表的部分数据内容

查看表信息

-- 包括参考表和分布表
select * from citus_tables ;

-- 包括所有类型的表及分布信息
select * from citus_shards ;

--- 查看库分布表size
SELECT logicalrelid AS name,
       pg_size_pretty(citus_table_size(logicalrelid)) AS size
  FROM pg_dist_partition where name = '$tablename';

--- 查看分布表分布在每个node上的size 
select table_name, nodename as node_name,round(sum(shard_size)*100.0/citus_table_size(table_name),2) percent, pg_size_pretty(sum(shard_size)) as table_size_node,pg_size_pretty(citus_table_size(table_name)) AS table_size from citus_shards where citus_table_type = 'distributed' group by nodename , table_name;

          table_name           | node_name  | percent | table_size_node | table_size
-------------------------------+------------+---------+-----------------+------------
mydistable | 10.10.2.12 |   32.60 | 1424 kB         | 4368 kB
mydistable | 10.10.2.14 |   60.81 | 2656 kB         | 4368 kB
(2 rows)

表管理

参考表创建

SELECT create_reference_table('tablename');

分布表创建

SELECT create_distributed_table('tablename', 'column');

注意事项

distributed relations cannot have UNIQUE, EXCLUDE, or PRIMARY KEY constraints that do not include the partition column (with an equality operator if EXCLUDE)

亲和性

SELECT create_distributed_table('github_events', 'repo_id',
                                colocate_with => 'github_repo');

更新表亲和性

SELECT update_distributed_table_colocation('A', colocate_with => 'B');

分片数量

show citus.shard_count;
set citus.shard_count = 64;

查看默认分布策略

SELECT * FROM pg_dist_rebalance_strategy;

设置分布策略

SELECT citus_set_default_rebalance_strategy('by_disk_size');

进度查看

SELECT * FROM get_rebalance_progress();

删除本地数据

-- 在将普通表转化化为分布表或参考表后,清空本地数据,待测试
SELECT truncate_local_data_after_distributing_table('tablename')

恢复表为本地表

select undistribute_table('table_name')
select undistribute_table('table_name',cascade_via_foreign_keys=>true); # 危险操作,注意所有表关联关系

函数管理

自定义函数


CREATE OR REPLACE FUNCTION
  delete_campaign(company_id int, campaign_id int)
RETURNS void LANGUAGE plpgsql AS $fn$
BEGIN
  DELETE FROM campaigns
   WHERE id = $2 AND campaigns.company_id = $1;
  DELETE FROM ads
   WHERE ads.campaign_id = $2 AND ads.company_id = $1;
END;
$fn$;

自定义函数下推

SELECT create_distributed_function(
  'delete_campaign(int, int)', 'company_id',
  colocate_with := 'campaigns'
);

查看执行计划,查看全部的task

SET citus.explain_all_tasks = 1;

高级特性

重新分布

加入删除节点时,不停服数据迁移

rebalance_table_shards()  #所有
rebalance_table_shards('tabename') #一个表

租户隔离

大租户单独分配,独享worker资源

创建分配

-- 根据租户ID隔离的分片
-- 返回新的shard id。
SELECT isolate_tenant_to_new_shard('table_name', tenant_id);
SELECT isolate_tenant_to_new_shard('table_name', tenant_id,'CASCADE');
│ isolate_tenant_to_new_shard │
├─────────────────────────────┤
│ 102240 │

迁移分片

SELECT nodename, nodeport
  FROM citus_shards
 WHERE shardid = 102240;

--  列出可能持有该分片的可用工作节点
SELECT * FROM master_get_active_worker_nodes();

-- 将分片移动到你选择的WORK节点上
--(它也会移动任何用CASCADE选项创建的分片)。
SELECT citus_move_shard_placement(
  102240,
  'source_host', source_port,
  'dest_host', dest_port);

时序数据分表管理

-- 自动创建分区表
SELECT create_time_partitions(
  table_name         := 'github_events',
  partition_interval := '1 month',
  end_at             := now() + '12 months'
);
-- 查看分区表
SELECT partition
  FROM time_partitions
 WHERE parent_table = 'github_events'::regclass;

归档数据列存

--转化为列存
CALL alter_old_partitions_set_access_method(
  'github_columnar_events',
  '2015-01-01 06:00:00' /* older_than */,
  'columnar'
);
-- 查看表的存储方式
SELECT partition, access_method
  FROM time_partitions
 WHERE parent_table = 'github_columnar_events'::regclass;

读写分离

-- 加入数据库从节点
select * from citus_add_secondary_node('new-node', 12345, 'primary-node', 12345);
-- 开启读写分离
citus.use_secondary_nodes
    never: (default) All reads happen on primary nodes.
    always: Reads run against secondary nodes instead, and insert/update statements are disabled.

节点管理

节点查看

select * from pg_dist_node;
 nodeid | groupid |  nodename  | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards 
--------+---------+------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------
      1 |       0 | 10.10.2.11 |     5432 | default  | t           | t        | primary  | default     | t              | f
      3 |       2 | 10.10.2.12 |     5432 | default  | t           | t        | primary  | default     | t              | t
      4 |       3 | 10.10.2.14 |     5432 | default  | t           | t        | primary  | default     | t              | t

删除节点

-- 删除一个节点
SELECT * from citus_drain_node('10.0.0.1', 5432);
-- 删除多个节点
#在每个节点上执行
SELECT * FROM citus_set_node_property(node_hostname, node_port, 'shouldhaveshards', false);
#
SELECT * FROM rebalance_table_shards(drain_only := true);

更新节点

select * from citus_update_node(123, 'new-address', 5432);

加入备用节点

select * from citus_add_inactive_node('new-node', 12345);

激活备用节点

select * from citus_activate_node('new-node', 12345);

集群健康管理

SELECT * FROM citus_check_cluster_node_health();

高可用管理

select * from citus_add_secondary_node('new-node', 12345, 'primary-node', 12345);
select * from citus_update_node(123, 'new-address', 5432);