citus11 管理手册
环境介绍
版本信息
-
centos7
-
postgres 14.4
-
citus 110-2
安装步骤
-
安装
略 -
配置
sudo pg_conftool 14 main set wal_level logical
sudo pg_conftool 14 main set listen_addresses '*'
sudo pg_conftool 14 main set shared_preload_libraries citus
sudo vi /etc/postgresql/14/main/pg_hba.conf
-
创建数据库免密码登录
-- Edit .pgpass in the postgres user’s home directory, hostname:port:database:username:password
节点信息
| nodenanme | IP | role |
|---|---|---|
| master01 | 10.10.20.11 | coordinator |
| worker01 | 10.10.2.12 | worker |
| worker02 | 10.10.2.14 | worker |
搭建集群
创建database&extension
在每个worker节点上执行
CREATE DATABASE newbie;
\c newbie
CREATE EXTENSION citus;
在master接节点上执行
CREATE DATABASE newbie;
\c newbie
CREATE EXTENSION citus;
SELECT citus_set_coordinator_host('10.10.2.11', 5432);
#添加worker节点
SELECT * from citus_add_node('10.10.2.12', 5432);
SELECT * from citus_add_node('10.10.2.14', 5432);
-- ... for all of them
查看集群节点
SELECT * FROM citus_get_active_worker_nodes();
node_name | node_port
------------+-----------
10.10.2.12 | 5432
10.10.2.14 | 5432
(2 rows)
查看节点分表
select * from pg_dist_node;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
--------+---------+------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------
1 | 0 | 10.10.2.11 | 5432 | default | t | t | primary | default | t | f
3 | 2 | 10.10.2.12 | 5432 | default | t | t | primary | default | t | t
4 | 3 | 10.10.2.14 | 5432 | default | t | t | primary | default | t | t
表管理
表类型
- 本地表
- 参考表
- 分布表
本地表
与传统表使用方式一致,数据只存放在master节点
参考表
每个节点(master , worker)包含一份表的所有数据,对表的DML采用2pc。适用于存放业务元数据,便于与分布表联合查询使用
分布表
根据分布键(通常为表的指定列),将数据分布到每个worker节点中。每个worker节点包含表的部分数据内容
查看表信息
-- 包括参考表和分布表
select * from citus_tables ;
-- 包括所有类型的表及分布信息
select * from citus_shards ;
--- 查看库分布表size
SELECT logicalrelid AS name,
pg_size_pretty(citus_table_size(logicalrelid)) AS size
FROM pg_dist_partition where name = '$tablename';
--- 查看分布表分布在每个node上的size
select table_name, nodename as node_name,round(sum(shard_size)*100.0/citus_table_size(table_name),2) percent, pg_size_pretty(sum(shard_size)) as table_size_node,pg_size_pretty(citus_table_size(table_name)) AS table_size from citus_shards where citus_table_type = 'distributed' group by nodename , table_name;
table_name | node_name | percent | table_size_node | table_size
-------------------------------+------------+---------+-----------------+------------
mydistable | 10.10.2.12 | 32.60 | 1424 kB | 4368 kB
mydistable | 10.10.2.14 | 60.81 | 2656 kB | 4368 kB
(2 rows)
表管理
参考表创建
SELECT create_reference_table('tablename');
分布表创建
SELECT create_distributed_table('tablename', 'column');
注意事项
distributed relations cannot have UNIQUE, EXCLUDE, or PRIMARY KEY constraints that do not include the partition column (with an equality operator if EXCLUDE)
亲和性
SELECT create_distributed_table('github_events', 'repo_id',
colocate_with => 'github_repo');
更新表亲和性
SELECT update_distributed_table_colocation('A', colocate_with => 'B');
分片数量
show citus.shard_count;
set citus.shard_count = 64;
查看默认分布策略
SELECT * FROM pg_dist_rebalance_strategy;
设置分布策略
SELECT citus_set_default_rebalance_strategy('by_disk_size');
进度查看
SELECT * FROM get_rebalance_progress();
删除本地数据
-- 在将普通表转化化为分布表或参考表后,清空本地数据,待测试
SELECT truncate_local_data_after_distributing_table('tablename')
恢复表为本地表
select undistribute_table('table_name')
select undistribute_table('table_name',cascade_via_foreign_keys=>true); # 危险操作,注意所有表关联关系
函数管理
自定义函数
CREATE OR REPLACE FUNCTION
delete_campaign(company_id int, campaign_id int)
RETURNS void LANGUAGE plpgsql AS $fn$
BEGIN
DELETE FROM campaigns
WHERE id = $2 AND campaigns.company_id = $1;
DELETE FROM ads
WHERE ads.campaign_id = $2 AND ads.company_id = $1;
END;
$fn$;
自定义函数下推
SELECT create_distributed_function(
'delete_campaign(int, int)', 'company_id',
colocate_with := 'campaigns'
);
查看执行计划,查看全部的task
SET citus.explain_all_tasks = 1;
高级特性
重新分布
加入删除节点时,不停服数据迁移
rebalance_table_shards() #所有
rebalance_table_shards('tabename') #一个表
租户隔离
大租户单独分配,独享worker资源
创建分配
-- 根据租户ID隔离的分片
-- 返回新的shard id。
SELECT isolate_tenant_to_new_shard('table_name', tenant_id);
SELECT isolate_tenant_to_new_shard('table_name', tenant_id,'CASCADE');
│ isolate_tenant_to_new_shard │
├─────────────────────────────┤
│ 102240 │
迁移分片
SELECT nodename, nodeport
FROM citus_shards
WHERE shardid = 102240;
-- 列出可能持有该分片的可用工作节点
SELECT * FROM master_get_active_worker_nodes();
-- 将分片移动到你选择的WORK节点上
--(它也会移动任何用CASCADE选项创建的分片)。
SELECT citus_move_shard_placement(
102240,
'source_host', source_port,
'dest_host', dest_port);
时序数据分表管理
-- 自动创建分区表
SELECT create_time_partitions(
table_name := 'github_events',
partition_interval := '1 month',
end_at := now() + '12 months'
);
-- 查看分区表
SELECT partition
FROM time_partitions
WHERE parent_table = 'github_events'::regclass;
归档数据列存
--转化为列存
CALL alter_old_partitions_set_access_method(
'github_columnar_events',
'2015-01-01 06:00:00' /* older_than */,
'columnar'
);
-- 查看表的存储方式
SELECT partition, access_method
FROM time_partitions
WHERE parent_table = 'github_columnar_events'::regclass;
读写分离
-- 加入数据库从节点
select * from citus_add_secondary_node('new-node', 12345, 'primary-node', 12345);
-- 开启读写分离
citus.use_secondary_nodes
never: (default) All reads happen on primary nodes.
always: Reads run against secondary nodes instead, and insert/update statements are disabled.
节点管理
节点查看
select * from pg_dist_node;
nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards
--------+---------+------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------
1 | 0 | 10.10.2.11 | 5432 | default | t | t | primary | default | t | f
3 | 2 | 10.10.2.12 | 5432 | default | t | t | primary | default | t | t
4 | 3 | 10.10.2.14 | 5432 | default | t | t | primary | default | t | t
删除节点
-- 删除一个节点
SELECT * from citus_drain_node('10.0.0.1', 5432);
-- 删除多个节点
#在每个节点上执行
SELECT * FROM citus_set_node_property(node_hostname, node_port, 'shouldhaveshards', false);
#
SELECT * FROM rebalance_table_shards(drain_only := true);
更新节点
select * from citus_update_node(123, 'new-address', 5432);
加入备用节点
select * from citus_add_inactive_node('new-node', 12345);
激活备用节点
select * from citus_activate_node('new-node', 12345);
集群健康管理
SELECT * FROM citus_check_cluster_node_health();
高可用管理
select * from citus_add_secondary_node('new-node', 12345, 'primary-node', 12345);
select * from citus_update_node(123, 'new-address', 5432);