一、keepalived概述
- 概述
keepalived起初为lvs设计的一款强大的辅助工具,保证lvs负载调度器的故障切换以及web节点的健康状态检查,后续被应用到很多需要容错的场景下,keepalived自身基于VRRP协议--虚拟路由冗余协议,思科共有协议;
二、设计原理
-
设计模块
- core模块
为keepalived的核心组件,负责主进程的启动、维护及全局配置文件的加载和解析 - check模块
负责real server节点池的节点的健康检测 - VRRP模块在master与backup之间执行新调检测
- core模块
-
热备实现过程
将多个主机以软件的方式组成一个热备足,通过共有虚拟ip(VIP)地址对外提供服务,同一时刻,热备足中只有一台主机在工作,别的主机冗余状态,当当前在线的主机失效时,其他冗余的主机会自动阶梯虚拟主机ip地址,继续提供服务,以保证架构的稳定性;
三、keepalived实现双机热备
系统类型 | IP地址 | 主机名 | 所需软件 |
---|---|---|---|
centos7.8 | 192.168.100.105 | node1.linux.com | keepalived-1.2.13.tar.gz |
centos7.8 | 192.168.100.106 | node2.linux.com | keepalived-1.2.13.tar.gz |
- 安装node1节点上的httpd服务
[root@node1 ~]# yum -y install httpd
[root@node1 ~]# cat <<END >>/var/www/html/index.html
192.168.100.105
END
[root@node1 ~]# systemctl start httpd
[root@node1 ~]# systemctl enable httpd
Created symlink from /etc/systemd/system/multi-user.target.wants/httpd.service to /usr/lib/systemd/system/httpd.service.
[root@node1 ~]# netstat -utpln |grep 80
tcp 0 0 0.0.0.0:80 0.0.0.0:* LISTEN 1217/httpd
- 安装node2节点上的httpd服务
[root@node1 ~]# yum -y install httpd
[root@node1 ~]# cat <<END >>/var/www/html/index.html
192.168.100.106
END
[root@node1 ~]# systemctl start httpd
[root@node1 ~]# systemctl enable httpd
Created symlink from /etc/systemd/system/multi-user.target.wants/httpd.service to /usr/lib/systemd/system/httpd.service.
[root@node1 ~]# netstat -utpln |grep 80
tcp 0 0 0.0.0.0:80 0.0.0.0:* LISTEN 1217/httpd
- 在两台node节点上安装keepalived软件程序(两台安装步骤一致,在此只列出一台)
[root@node1 ~]# yum -y install kernel-devel openssl-devel popt-devel ##安装内核开发包,popt支持库等工具
[root@node1 ~]# tar -zxvf keepalived-1.2.13.tar.gz -C /usr/src/
[root@node1 ~]# cd /usr/src/keepalived-1.2.13/
[root@node1 keepalived-1.2.13]# ./configure --prefix=/usr/local/keepalived
[root@node1 keepalived-1.2.13]# make &&make install
[root@node1 keepalived-1.2.13]# cd
[root@node1 ~]# mkdir -p /etc/keepalived ##程序的主配置目录
[root@node1 ~]# cp /usr/local/keepalived/etc/keepalived/keepalived.conf /etc/keepalived/ ## 复制主配置文件
[root@node1 ~]# cp /usr/local/keepalived/etc/sysconfig/keepalived /etc/sysconfig/ ##复制启动时需要加载的配置文件
[root@node1 ~]# cp /usr/local/keepalived/etc/rc.d/init.d/keepalived /etc/init.d/ ##复制服务的控制脚本
[root@node1 ~]# cp /usr/local/keepalived/sbin/keepalived /usr/sbin/ ##复制keepalived的命令
[root@node1 ~]# chmod 755 /etc/init.d/keepalived ##为指控脚本指定权限
- 配置node1上master主节点
[root@node1 ~]# vi /etc/keepalived/keepalived.conf
global_defs {
router_id HA_TEST_R1 ##本服务器的名称,若环境中有多个keepalived时,此名称不能一致
}
vrrp_instance VI_1 { ##定义VRRP热备实例,每一个keep组都不同
state MASTER ##MASTER表示主服务器
interface eth0 ##承载VIP地址的物理接口
virtual_router_id 1 ##虚拟路由器的ID号,每一个keep组都不同
priority 100 ##优先级,数值越大优先级越高
advert_int 1 ##通告检查间隔秒数(心跳频率)
authentication { ##认证信息
auth_type PASS ##认证类型
auth_pass 123456 ##密码字串
}
virtual_ipaddress {
192.168.100.95 ##指定漂移地址(VIP)
}
}
virtual_server 192.168.100.95 80 { #vip配置
delay_loop 2 #每隔2秒检查一次real_server状态
#lb_algo wrr ##指定lvs的调度算法
#lb_kind DR ##lvs集群模式(如若不结合LVS请删掉两个配置)
persistence_timeout 60 ##会话保持时间
protocol TCP ##选择协议
real_server 192.168.100.105 80 { ##本机地址
weight :3 ##服务器的权重
notify_down /etc/keepalived/check.sh ##指定节点失效后,采用的脚本,notify_up表示节点正常后,采用的脚本
##健康检查方式一共有HTTP_GET|SSL_GET|TCP_CHECK|SMTP_CHECK|MISC_CHECK这些
TCP_CHECK {
connect_timeout 10 ##连接超时时间
nb_get_retry 3 ##重连次数
delay_before_retry 3 ##重连间隔时间
connect_port 80 ##健康检查端口
}
}
}
[root@node1 ~]# vi /etc/keepalived/check.sh
#!/bin/bash
/etc/init.d/keepalived stop
echo -e "$(ip a |grep eth0 |grep inet |awk '{print $2}'|awk -F'/' '{print $1}') (httpd) is down on $(date +%F-%T)" >>/root/check_httpd.log
:wq
[root@node1 ~]# chmod 777 /etc/keepalived/check.sh
[root@node1 ~]# /etc/init.d/keepalived start
Starting keepalived (via systemctl): [ 确定 ]
[root@node1 ~]# ip a |grep 192.168.100.95
inet 192.168.100.95/32 scope global eth0
- 配置node2上backup从节点
[root@node2 ~]# vi /etc/keepalived/keepalived.conf
global_defs {
router_id HA_TEST_R2 ##本服务器的名称
}
vrrp_instance VI_1 {
state BACKUP ##BACKUP表示从服务器
interface eth0
virtual_router_id 1
priority 99 ##优先级,低于主服务器
advert_int 1
authentication {
auth_type PASS
auth_pass 123456
}
virtual_ipaddress {
192.168.100.95
}
}
virtual_server 192.168.100.95 80 { ##vip配置
delay_loop 2 ##每隔2秒检查一次real_server状态
#lb_algo wrr
#lb_kind DR ##如若不结合LVS,需要去掉两个配置项
persistence_timeout 60 ##会话保持时间
protocol TCP
real_server 192.168.100.106 80 { ##本机地址
weight :3
notify_down /etc/keepalived/check.sh
TCP_CHECK {
connect_timeout 10 ##连接超时时间
nb_get_retry 3 ##重连次数
delay_before_retry 3 ##重连间隔时间
connect_port 80 ##健康检查端口
}
}
}
:wq
[root@node2 ~]# vi /etc/keepalived/check.sh
#!/bin/bash
/etc/init.d/keepalived stop
echo -e "$(ip a |grep eth0 |grep inet |awk '{print $2}'|awk -F'/' '{print $1}') (httpd) is down on $(date +%F-%T)" >>/root/check_httpd.log
:wq
[root@node2 ~]# chmod 777 /etc/keepalived/check.sh
[root@node2 ~]# /etc/init.d/keepalived start
[root@node1 ~]# systemctl stop httpd
[root@node1 ~]# /etc/init.d/keepalived status
¡ñ keepalived.service - SYSV: Start and stop Keepalived
Loaded: loaded (/etc/rc.d/init.d/keepalived; bad; vendor preset: disabled)
Active: inactive (dead)
Docs: man:systemd-sysv-generator(8)
8ÔÂ 24 21:28:57 node1.linux.com Keepalived_healthcheckers[2860]: Netlink reflector reports IP 192.168.100.95 added
8ÔÂ 24 21:29:02 node1.linux.com Keepalived_vrrp[2861]: VRRP_Instance(VI_1) Sending gratuitous ARPs on eth0 for 192.168.100.95
8ÔÂ 24 21:36:44 node1.linux.com Keepalived_healthcheckers[2860]: TCP connection to [192.168.100.105]:80 failed !!!
8ÔÂ 24 21:36:44 node1.linux.com Keepalived_healthcheckers[2860]: Removing service [192.168.100.105]:80 from VS [192.168.100.95]:80
8ÔÂ 24 21:36:44 node1.linux.com Keepalived_healthcheckers[2860]: IPVS: Service not defined
8ÔÂ 24 21:36:44 node1.linux.com Keepalived_healthcheckers[2860]: Executing [/etc/keepalived/check.sh] for service [192.168.100.105]:80 in VS [192.168.100.95]:80
8ÔÂ 24 21:36:44 node1.linux.com Keepalived_healthcheckers[2860]: Lost quorum 1-0=1 > 0 for VS [192.168.100.95]:80
8ÔÂ 24 21:36:44 node1.linux.com systemd[1]: Stopping SYSV: Start and stop Keepalived...
8ÔÂ 24 21:36:44 node1.linux.com keepalived[2926]: Stopping keepalived: [ 确定 ]
8ÔÂ 24 21:36:44 node1.linux.com systemd[1]: Stopped SYSV: Start and stop Keepalived.
将node1的httpd和keepalived服务重新启动
[root@node1 ~]# systemctl start httpd
[root@node1 ~]# /etc/init.d/keepalived start
Starting keepalived (via systemctl): [ 确定 ]
[root@node1 ~]# /etc/init.d/keepalived status
¡ñ keepalived.service - SYSV: Start and stop Keepalived
Loaded: loaded (/etc/rc.d/init.d/keepalived; bad; vendor preset: disabled)
Active: active (running) since Èý 2022-08-24 21:38:10 CST; 4s ago
Docs: man:systemd-sysv-generator(8)
Process: 2979 ExecStart=/etc/rc.d/init.d/keepalived start (code=exited, status=0/SUCCESS)
Main PID: 2986 (keepalived)
CGroup: /system.slice/keepalived.service
©À©¤2986 keepalived -D
©À©¤2988 keepalived -D
©¸©¤2989 keepalived -D
8ÔÂ 24 21:38:11 node1.linux.com Keepalived_healthcheckers[2988]: IPVS: Scheduler or persistence engine not found
8ÔÂ 24 21:38:11 node1.linux.com Keepalived_healthcheckers[2988]: IPVS: No such process
8ÔÂ 24 21:38:11 node1.linux.com Keepalived_healthcheckers[2988]: Using LinkWatch kernel netlink reflector...
8ÔÂ 24 21:38:11 node1.linux.com Keepalived_healthcheckers[2988]: Activating healthchecker for service [192.168.100.105]:80
8ÔÂ 24 21:38:11 node1.linux.com Keepalived_vrrp[2989]: VRRP_Instance(VI_1) Transition to MASTER STATE
8ÔÂ 24 21:38:11 node1.linux.com Keepalived_vrrp[2989]: VRRP_Instance(VI_1) Received lower prio advert, forcing new election
8ÔÂ 24 21:38:12 node1.linux.com Keepalived_vrrp[2989]: VRRP_Instance(VI_1) Entering MASTER STATE
8ÔÂ 24 21:38:12 node1.linux.com Keepalived_vrrp[2989]: VRRP_Instance(VI_1) setting protocol VIPs.
8ÔÂ 24 21:38:12 node1.linux.com Keepalived_vrrp[2989]: VRRP_Instance(VI_1) Sending gratuitous ARPs on eth0 for 192.168.100.95
8ÔÂ 24 21:38:12 node1.linux.com Keepalived_healthcheckers[2988]: Netlink reflector reports IP 192.168.100.95 added
将node1节点的httpd和keepalived服务重新启动后,发现node1节点重新抢占了master主节点,如若不想实现node节点自动抢占,可在主从节点配置文件中修改如下: