在 hdfs HA 完成的基础上配置 RM HA, 修改 yarn-site.xml. (同样需要两个节点配置 RM)
set yarn.resourcemanager.ha.enabled=true # 启用 rm ha
set yarn.resourcemanager.cluster-id=<name> # rm 集群id
set yarn.resourcemanager.ha.rm-ids=<RM1 name>,<RM2 name> # 配置 rm ha 中 rm 的节点名称
set yarn.resourcemanager.ha.automatic-failover.enabled=true # 故障自动切换
set yarn.resourcemanager.recovery.enabled=true # 自动恢复
# 循环配置两个rm 节点
set yarn.resourcemanager.hostname.<RM1 name>=master # RM1 name 的ip
set yarn.resourcemanager.webapp.address.<RM1 name>=master:8088
set yarn.resourcemanager.address.<RM1 name>=master:8132
set yarn.resourcemanager.scheduler.address.<RM1 name>=master:8130
set yarn.resourcemanager.resource-tracker.address.<RM1 name>=master:8131
set yarn.resourcemanager.admin.address.<RM1 name>=master:8033
set yarn.resourcemanager.ha.admin.address.<RM1 name>=master:23142
set yarn.resourcemanager.ha.id=<RM1 name> # 注意这里两台 需要配置不同 name
# 配置zk
set yarn.resourcemanager.zk-address=master:2181,slave2:2181,slave3:2181
set yarn.resourcemanager.zk-state-store.address=master:2181,slave2:2181,slave3:2181
set yarn.resourcemanager.store.class=org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore
set yarn.resourcemanager.ha.automatic-failover.zk-base-path=/yarn-leader-election # yarn 在zk 中保存数据的路径
配置完成之后分别在两台RM节点上执行start-yarn.sh (需要启动好 zkfc )
访问 rm1:8088和rm2:8088的状态分别为 active 和 standby, 通过 kill <rm1 pid>来确认 自动切换是否正常。