基础环境
系统环境 | 软件版本 | IP地址 | FE | CN | CPU指令集 |
---|---|---|---|---|---|
Rocky Linux release 9.4 | StarRocks-3.3.0 | 192.168.8.198 | LEADER | CN | AVX2 |
Rocky Linux release 9.4 | StarRocks-3.3.0 | 192.168.8.195 | FOLLOWER | CN | AVX2 |
Rocky Linux release 9.4 | StarRocks-3.3.0 | 192.168.8.194 | FOLLOWER | CN | AVX2 |
Ubuntu 20.04 | minio:latest | 192.168.8.197 | none | none | none |
安装之前须查看系统CPU是否支持AVX2指令集
cat /proc/cpuinfo | grep avx2
注意:StarRocks 依靠 AVX2 指令集充分发挥其矢量化能力,因此CPU指令集必须存在AVX2否则服务将无法正常工作。
环境配置
官网地址:https://docs.starrocks.io/zh/docs/deployment/environment_configurations/
JDK环境配置
dnf install java-11-openjdk-devel
echo "export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-11.0.23.0.9-3.el9.x86_64/" >> /etc/profile
echo "export PATH=/usr/lib/jvm/java-11-openjdk-11.0.23.0.9-3.el9.x86_64/bin/:$PATH" >> /etc/profile
echo "export LANG=en_US.UTF8" >> /etc/profile #配置 LANG 变量
source /etc/profile
禁用Transparent Huge Pages
# 临时变更。
echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/defrag
# 永久变更。
cat >> /etc/rc.d/rc.local << EOF
if test -f /sys/kernel/mm/transparent_hugepage/enabled; then
echo madvise > /sys/kernel/mm/transparent_hugepage/enabled
fi
if test -f /sys/kernel/mm/transparent_hugepage/defrag; then
echo madvise > /sys/kernel/mm/transparent_hugepage/defrag
fi
EOF
禁用 Swap Space
#临时关闭 Swap Space。
swapoff -a
#永久关闭swapoff
#从 /etc/fstab 文件中删除 Swap Space 信息。
[root@starrocks ~]# cat /etc/fstab
...........................................................................
#/dev/mapper/rl-swap none swap defaults 0 0
#重启机器后确认 Swap Space 已关闭。
[root@starrocks ~]# free -h
total used free shared buff/cache available
Mem: 15Gi 583Mi 11Gi 8.0Mi 3.9Gi 14Gi
Swap: 0B 0B 0B
禁用 SELinux
# 临时变更。
setenforce 0
# 永久变更。
sed -i 's/SELINUX=.*/SELINUX=disabled/' /etc/selinux/config
sed -i 's/SELINUXTYPE/#SELINUXTYPE/' /etc/selinux/config
关闭防火墙
systemctl stop firewalld.service && systemctl disable firewalld.service
内核参数优化
cat > /etc/sysctl.conf << EOF
vm.overcommit_memory = 1
vm.swappiness = 0
net.ipv4.tcp_abort_on_overflow=1
net.core.somaxconn=1024
vm.max_map_count = 262144
EOF
#使修改生效
sysctl -p
vm.max_map_count = 262144 #高并发配置
net.core.somaxconn=1024 #监听 Socket 队列的最大连接请求数为 1024
net.ipv4.tcp_abort_on_overflow=1 #网络配置
vm.swappiness=0 #禁用 Swappiness
时区配置
timedatectl set-timezone "Asia/Shanghai"
hwclock
时间同步配置
cat > /etc/chrony.conf << EOF
pool ntp1.aliyun.com iburst
pool ntp2.aliyun.com iburst
sourcedir /run/chrony-dhcp
driftfile /var/lib/chrony/drift
makestep 1.0 3
rtcsync
keyfile /etc/chrony.keys
ntsdumpdir /var/lib/chrony
leapsectz right/UTC
logdir /var/log/chrony
EOF
systemctl restart chronyd
ulimit 设置
cat >> /etc/security/limits.conf << EOF
* soft nproc 65535
* hard nproc 65535
* soft nofile 655350
* hard nofile 655350
* soft stack unlimited
* hard stack unlimited
* hard memlock unlimited
* soft memlock unlimited
EOF
cat >> /etc/security/limits.d/20-nproc.conf << EOF
* soft nproc 65535
root soft nproc 65535
EOF
其他
echo 120000 > /proc/sys/kernel/threads-max
echo 200000 > /proc/sys/kernel/pid_max
建议环境设置好后,重启下机器,然后再进行部署!!!
部署StarRocks
Minio
1.安装docker
apt install docker.io docker-compose -y
2.部署minio
cat > minio.yaml << EOF
version: "3"
services:
minio:
container_name: minio
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: Sec@2024
image: minio/minio:latest
ports:
- "9001:9001"
- "9000:9000"
command: minio server /minio_data --console-address ":9001"
volumes:
- "/opt/minio/minio-data/:/minio_data"
healthcheck:
test: ["CMD", "curl", "-f", "http://minio:9000/minio/health/live"]
interval: 5s
timeout: 5s
retries: 5
EOF
3.创建ak、sk
{"url":"http://10.84.3.128:9001/api/v1/service-account-credentials","accessKey":"starrocks","secretKey":"Sec@2024","api":"s3v4","path":"auto"}
FE
1. 下载并解压至工作目录
wget https://releases.starrocks.io/starrocks/StarRocks-3.3.0.tar.gz && tar zxvf StarRocks-3.3.0.tar.gz -C /opt/
2. 创建FE元数据目录
mkdir /apps/data/fe/meta -p
3. 配置FE文件
cat > /opt/StarRocks-3.3.0/fe/conf/fe.conf << EOF
run_mode = shared_data
priority_networks = 192.168.0.0/24
cloud_native_meta_port = 6090
enable_load_volume_from_conf = true
cloud_native_storage_type = S3
meta_dir = /apps/data/fe/meta #FE元数据目录
aws_s3_path = starrocks #Bucket名称
aws_s3_endpoint = http://192.168.8.197:9000 #对象存储IP:PORT
aws_s3_region = starrocks
aws_s3_access_key = starrocks #AK
aws_s3_secret_key = Sec@2024 #SK
LOG_DIR = ${STARROCKS_HOME}/log
DATE = "$(date +%Y%m%d-%H%M%S)"
JAVA_OPTS="-Dlog4j2.formatMsgNoLookups=true -Xmx8192m -XX:+UseG1GC -Xlog:gc*:${LOG_DIR}/fe.gc.log.$DATE:time -XX:ErrorFile=${LOG_DIR}/hs_err_pid%p.log -Djava.security.policy=${STARROCKS_HOME}/conf/udf_security.policy"
sys_log_level = INFO
http_port = 8030
rpc_port = 9020
query_port = 9030
edit_log_port = 9010
mysql_service_nio_enabled = true
# 最小的Compaction score,低于该值的 Partition 不会发起Compaction任务
lake_compaction_score_selector_min_score = 10.0
# FE 上可同时发起的 Compaction Task 数量
# 默认值为-1,即FE会根据系统中 BE 数量自动计算
# 如果设置为0,则 FE 不会发起任何 Compaction 任务
lake_compaction_max_tasks = -1
# 控制show proc '/compactions' 显示的结果数量,默认为12
lake_compaction_history_size = 12
lake_compaction_fail_history_size = 12
EOF
4. 启动主节点FE
/opt/StarRocks-3.3.0/fe/bin/start_fe.sh --daemon
注意:从节点和主节点是一样的配置,但是从节点配置完先不启动,只启动主节点,等配置完成后,启动从节点。
主节点执行 1-4,从节点执行 1-3
5. 验证是否启动成功
[root@starrocks01 ~]# cat /opt/StarRocks-3.3.0/fe/log/fe.log | grep thrift
2024-07-15 09:14:16.241+08:00 INFO (UNKNOWN 192.168.8.198_9010_1721006047505(-1)|1) [FrontendThriftServer.start():65] thrift server started with port 9020.
CN
1. 创建CN元数据目录
mkdir -p /apps/data/cn
2. 配置CN文件
cat > /opt/StarRocks-3.3.0/be/conf/cn.conf << EOF
sys_log_level = INFO
be_port = 9060
be_http_port = 8040
heartbeat_service_port = 9050
brpc_port = 8060
starlet_port = 9070
priority_networks = 192.168.0.0/24
storage_root_path = /apps/data/cn
cumulative_compaction_num_threads_per_disk = 4
base_compaction_num_threads_per_disk = 2
cumulative_compaction_check_interval_seconds = 2
# 控制 BE/CN 上同时执行 Compaction 任务的线程数,默认值为4
# 也即 BE 上可同时为多少个 Tablet进行 Compaction
compact_threads = 4
# BE 上 Compaction任务队列大小,控制可接收来自FE的最大Compaction 任务数
# 默认值为100
compact_thread_pool_queue_size = 100
# 单次 Compaction 任务最多合并的数据文件数量,默认为1000
# 在实践中我们建议将该值调整为100,这样,每个 Compaction Task 可以更快速地结束
# 且消耗更少的资源
max_cumulative_compaction_num_singleton_deltas=100
EOF
3. 启动CN
/opt/StarRocks-3.3.0/be/bin/start_cn.sh --daemon
4. 验证是否启动成功
[root@starrocks01 ~]# cat /opt/StarRocks-3.3.0/be/log/cn.INFO | grep heartbeat
I0715 09:22:45.866142 22959 thrift_server.cpp:383] heartbeat has started listening port on 9050
I0715 09:22:45.866148 22959 starrocks_be.cpp:282] CN start step 13: start heartbeat server successfully
其他节点安装CN可重复1-4步骤,CN三台可一起启动,无先后顺序
搭建集群
1. 使用MySQL客户端连接FE
#安装客户端
apt install mysql-client
#连接FE
mysql -uroot -P9030 -h 127.0.0.1
2. 配置 FE 高可用
mysql> ALTER SYSTEM ADD FOLLOWER "192.168.8.195:9010";
Query OK, 0 rows affected (0.03 sec)
mysql> ALTER SYSTEM ADD FOLLOWER "192.168.8.194:9010";
Query OK, 0 rows affected (0.02 sec)
3. 启动FE从节点
/opt/StarRocks-3.3.0/fe/bin/start_fe.sh --helper 192.168.8.198:9010 --daemon
注意:如果在配置高可用之前启动了从节点FE,配置完高可用后重启时会报错
192.168.8.195_9010_1721022086495 weren't! UNEXPECTED_STATE: Unexpected internal state, may have side effects.
解决办法:
# 清除从FE的meta目录
rm -rf /apps/data/fe/meta/*
#重新启动从节点FE
/opt/StarRocks-3.3.0/fe/bin/start_fe.sh --helper 192.168.8.198:9010 --daemo
4. 查看FE状态
mysql> SHOW FRONTENDS\G;
*************************** 1. row ***************************
Name: 192.168.8.198_9010_1721006047505
IP: 192.168.8.198
EditLogPort: 9010
HttpPort: 8030
QueryPort: 9030
RpcPort: 9020
Role: LEADER
ClusterId: 463766615
Join: true
Alive: true
ReplayedJournalId: 5042
LastHeartbeat: 2024-07-15 13:54:43
IsHelper: true
ErrMsg:
StartTime: 2024-07-15 09:22:27
Version: 3.3.0-19a3f66
*************************** 2. row ***************************
Name: 192.168.8.194_9010_1721021656623
IP: 192.168.8.194
EditLogPort: 9010
HttpPort: 8030
QueryPort: 9030
RpcPort: 9020
Role: FOLLOWER
ClusterId: 463766615
Join: true
Alive: true
ReplayedJournalId: 5040
LastHeartbeat: 2024-07-15 13:54:43
IsHelper: true
ErrMsg:
StartTime: 2024-07-15 13:53:49
Version: 3.3.0-19a3f66
*************************** 3. row ***************************
Name: 192.168.8.195_9010_1721021647617
IP: 192.168.8.195
EditLogPort: 9010
HttpPort: 8030
QueryPort: 9030
RpcPort: 9020
Role: FOLLOWER
ClusterId: 463766615
Join: true
Alive: true
ReplayedJournalId: 5040
LastHeartbeat: 2024-07-15 13:54:43
IsHelper: true
ErrMsg:
StartTime: 2024-07-15 13:51:06
Version: 3.3.0-19a3f66
3 rows in set (0.02 sec)
如果字段 Role 为 FOLLOWER,说明该 FE 节点有资格被选为 Leader FE 节点。
如果字段 Role 为 LEADER,说明该 FE 节点为 Leader FE 节点。
5. 添加CN节点
mysql> ALTER SYSTEM ADD COMPUTE NODE "192.168.8.198:9050";
Query OK, 0 rows affected (0.04 sec)
mysql> ALTER SYSTEM ADD COMPUTE NODE "192.168.8.195:9050";
Query OK, 0 rows affected (0.03 sec)
mysql> ALTER SYSTEM ADD COMPUTE NODE "192.168.8.194:9050";
Query OK, 0 rows affected (0.02 sec)
6. 查看CN状态
mysql> SHOW PROC '/compute_nodes'\G
*************************** 1. row ***************************
ComputeNodeId: 12600
IP: 192.168.8.194
HeartbeatPort: 9050
BePort: 9060
HttpPort: 8040
BrpcPort: 8060
LastStartTime: 2024-07-15 13:12:56
LastHeartbeat: 2024-07-15 14:09:38
Alive: true
SystemDecommissioned: false
ClusterDecommissioned: false
ErrMsg:
Version: 3.3.0-19a3f66
CpuCores: 40
NumRunningQueries: 0
MemUsedPct: 0.47 %
CpuUsedPct: 0.0 %
DataCacheMetrics: Status: Normal, DiskUsage: 0B/0B, MemUsage: 0B/0B
HasStoragePath: true
StarletPort: 9070
WorkerId: 9
WarehouseName: default_warehouse
TabletNum: 17
*************************** 2. row ***************************
ComputeNodeId: 12599
IP: 192.168.8.195
HeartbeatPort: 9050
BePort: 9060
HttpPort: 8040
BrpcPort: 8060
LastStartTime: 2024-07-15 13:09:46
LastHeartbeat: 2024-07-15 14:09:38
Alive: true
SystemDecommissioned: false
ClusterDecommissioned: false
ErrMsg:
Version: 3.3.0-19a3f66
CpuCores: 20
NumRunningQueries: 0
MemUsedPct: 0.97 %
CpuUsedPct: 0.0 %
DataCacheMetrics: Status: Normal, DiskUsage: 0B/0B, MemUsage: 0B/0B
HasStoragePath: true
StarletPort: 9070
WorkerId: 8
WarehouseName: default_warehouse
TabletNum: 21
*************************** 3. row ***************************
ComputeNodeId: 12521
IP: 192.168.8.198
HeartbeatPort: 9050
BePort: 9060
HttpPort: 8040
BrpcPort: 8060
LastStartTime: 2024-07-15 13:04:21
LastHeartbeat: 2024-07-15 14:09:38
Alive: true
SystemDecommissioned: false
ClusterDecommissioned: false
ErrMsg:
Version: 3.3.0-19a3f66
CpuCores: 40
NumRunningQueries: 0
MemUsedPct: 0.12 %
CpuUsedPct: 0.0 %
DataCacheMetrics: Status: Normal, DiskUsage: 0B/0B, MemUsage: 0B/0B
HasStoragePath: true
StarletPort: 9070
WorkerId: 1
WarehouseName: default_warehouse
TabletNum: 20
3 rows in set (0.00 sec)
7. 停止FE
/opt/StarRocks-3.3.0/fe/bin/stop_fe.sh --daemon
8. 停止CN
/opt/StarRocks-3.3.0/be/bin/stop_cn.sh --daemon
仅登录用户可评论,点击 登录