CentOS 搭建八节点高可用 Hadoop HBase Kafka Flink Spark Zookeeper 集群
版本选择
组件 | 版本 |
---|---|
JDK | 1.8.0_261 |
Zookeeper | 3.5.8 |
Hadoop | 3.2.1 |
HBase | 2.2.5 |
Spark | 3.0.1-hadoop_3.2 |
Kafka | 2.6.0-scala_2.13 |
Flink | 1.11.2-scala_2.12 |
flink-shaded-hadoop-3-uber | 3.1.1.7.1.1.0-565-9.0 |
基础环境部署
服务器规划
共计 8 台分布式存储服务器
ZK 集群高可用需要奇数个节点,这里用 5 节点部署 ZK 高可用集群
hadoop 集群高可用集群使用 8 个节点
NameNode 一主(hadoop001)三备(hadoop002、hadoop003、hadoop004)
ResourceManager 一主(hadoop002)三备(hadoop003、hadoop004、hadoop005)
JournalNode 使用 7 个节点(hadoop001-007)
hadoop001: zookeeper、hadoop、主NameNode、DFSZKFailoverController、DataNode、JournalNode、NodeManager
hadoop002: zookeeper、hadoop、备NameNode、DFSZKFailoverController、主ResourceManager、DataNode、JournalNode、NodeManager
hadoop003: zookeeper、hadoop、备NameNode、备ResourceManager、DataNode、JournalNode、NodeManager
hadoop004: zookeeper、hadoop、备NameNode、备ResourceManager、DataNode、JournalNode、NodeManager
hadoop005: zookeeper、hadoop、备ResourceManager、DataNode、JournalNode、NodeManager
hadoop006: hadoop、DataNode、JournalNode、NodeManager
hadoop007: hadoop、DataNode、JournalNode、NodeManager
hadoop008: hadoop、DataNode、NodeManager
防火墙配置(all nodes)
# sed -i -e "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
# systemctl stop firewalld
# systemctl disable firewalld
# setenforce 0
通用工具安装(all nodes)
# yum -y install wget tree vim unzip zlib zip net-tools lsof telnet dos2unix ntfs-3g pcre gcc-c++ openssl pcre-devel yum-utils
hosts 准备(all nodes)
所有集群节点
# echo '
192.168.2.11 hadoop001
192.168.2.12 hadoop002
192.168.2.13 hadoop003
192.168.2.14 hadoop004
192.168.2.15 hadoop005
192.168.2.16 hadoop006
192.168.2.17 hadoop007
192.168.2.18 hadoop008' > /etc/hosts
免密登录配置
第一个节点即主节点生成密钥(hadoop001)
# ssh-keygen -t rsa
将 hadoop001 的公钥写到本机和远程机器的 ~/ .ssh/authorized_key 文件中(hadoop001)
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop001
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop002
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop003
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop004
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop005
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop006
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop007
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop008
验证免密登录(hadoop001)
# ssh hadoop001
# ssh hadoop002
# ssh hadoop003
# ssh hadoop004
# ssh hadoop005
# ssh hadoop006
# ssh hadoop007
# ssh hadoop008
yum 优化(hadoop001)
# cd /etc/yum.repos.d
# mkdir bak && mv *.repo bak/
# curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-7.repo
# curl -o /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
分发
# scp -rf /etc/yum.repos.d root@hadoop002:/etc/
# scp -r /etc/yum.repos.d root@hadoop003:/etc/
# scp -r /etc/yum.repos.d root@hadoop004:/etc/
# scp -r /etc/yum.repos.d root@hadoop005:/etc/
# scp -r /etc/yum.repos.d root@hadoop006:/etc/
# scp -r /etc/yum.repos.d root@hadoop007:/etc/
# scp -r /etc/yum.repos.d root@hadoop008:/etc/
时间同步
ntpdata(all nodes)
# yum -y install ntpdate
# ntpdate ntp1.aliyun.com
Java 运行环境部署(hadoop001)
解压安装(hadoop001)
# mkdir /usr/java
# tar zxvf jdk-8u261-linux-x64.tar.gz -C /usr/java
# echo '# Java Env
export JAVA_HOME=/usr/java/jdk1.8.0_261
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools/jar
export PATH=$PATH:$JAVA_HOME/bin' >> /etc/profile
# source /etc/profile
分发至其它节点
# scp -r /usr/java root@hadoop002:/usr/
# scp -r /usr/java root@hadoop003:/usr/
# scp -r /usr/java root@hadoop004:/usr/
# scp -r /usr/java root@hadoop005:/usr/
# scp -r /usr/java root@hadoop006:/usr/
# scp -r /usr/java root@hadoop007:/usr/
# scp -r /usr/java root@hadoop008:/usr/
# scp /etc/profile root@hadoop002:/etc/profile
# scp /etc/profile root@hadoop003:/etc/profile
# scp /etc/profile root@hadoop004:/etc/profile
# scp /etc/profile root@hadoop005:/etc/profile
# scp /etc/profile root@hadoop006:/etc/profile
# scp /etc/profile root@hadoop007:/etc/profile
# scp /etc/profile root@hadoop008:/etc/profile
Zookeeper 高可用集群搭建
下载安装 (/usr/local) (hadoop001)
# cd /usr/local
# wget https://mirrors.ustc.edu.cn/apache/zookeeper/stable/apache-zookeeper-3.5.8-bin.tar.gz
# tar zxvf apache-zookeeper-3.5.8-bin.tar.gz
# mv apache-zookeeper-3.5.8-bin zookeeper
主节点修改配置(hadoop001)
# cp /usr/local/zookeeper/conf/zoo_sample.cfg /usr/local/zookeeper/conf/zoo.cfg
/usr/local/zookeeper/conf/zoo.cfg
maxClientCnxns=0
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/usr/local/zookeeper/data
dataLogDir=/usr/local/zookeeper/log
clientPort=2181
# server.1 这个1是服务器的标识,可以是任意有效数字,标识这是第几个服务器节点,这个标识要写到dataDir目录下面myid文件里
# 指名集群间通讯端口和选举端口
server.1=hadoop001:2888:3888
server.2=hadoop002:2888:3888
server.3=hadoop003:2888:3888
server.4=hadoop004:2888:3888
server.5=hadoop005:2888:3888
创建数据存储目录和日志目录
# mkdir -p /usr/local/zookeeper/{data,log}
分发至其它节点
# scp -r /usr/local/zookeeper root@hadoop002:/usr/local/
# scp -r /usr/local/zookeeper root@hadoop003:/usr/local/
# scp -r /usr/local/zookeeper root@hadoop004:/usr/local/
# scp -r /usr/local/zookeeper root@hadoop005:/usr/local/
写入节点标识(所有 ZK 节点执行)
## hadoop001 主机执行
# echo "1" > /usr/local/zookeeper/data/myid
## hadoop002 主机执行
# echo "2" > /usr/local/zookeeper/data/myid
## hadoop003 主机执行
# echo "3" > /usr/local/zookeeper/data/myid
## hadoop004 主机执行
# echo "4" > /usr/local/zookeeper/data/myid
## hadoop005 主机执行
# echo "5" > /usr/local/zookeeper/data/myid
配置环境变量
所有 ZK 节点均需配置
# echo '
export ZOOKEEPER_HOME=/usr/local/zookeeper
export PATH=$PATH:$ZOOKEEPER_HOME/bin' >> /etc/profile
source /etc/profile
启动集群
分别在 5 台主机上,执行如下命令启动服务:
# zkServer.sh start
各个 ZK 节点使用如下命令查看状态
[root@hadoop001 ~]# zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: follower
[root@hadoop001 ~]#
[root@hadoop002 ~]# zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: follower
[root@hadoop002 ~]#
[root@hadoop003 ~]# zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: leader
[root@hadoop003 ~]#
[root@hadoop004 ~]# zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: follower
[root@hadoop004 ~]#
[root@hadoop005 ~]# zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zookeeper/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: follower
[root@hadoop005 ~]#
Hadoop 高可用集群搭建
Hadoop 高可用 (High Availability) 分为 HDFS 高可用和 YARN 高可用,两者的实现基本类似,但 HDFS NameNode 对数据存储及其一致性的要求比 YARN ResourceManger 高得多,所以它的实现也更加复杂。
HDFS 高可用架构主要由以下组件所构成:
Active NameNode 和 Standby NameNode:两台 NameNode 形成互备,一台处于 Active 状态,为主 NameNode,另外一台处于 Standby 状态,为备 NameNode,只有主 NameNode 才能对外提供读写服务。
主备切换控制器 ZKFailoverController:ZKFailoverController 作为独立的进程运行,对 NameNode 的主备切换进行总体控制。ZKFailoverController 能及时检测到 NameNode 的健康状况,在主 NameNode 故障时借助 Zookeeper 实现自动的主备选举和切换,当然 NameNode 目前也支持不依赖于 Zookeeper 的手动主备切换。
Zookeeper 集群:为主备切换控制器提供主备选举支持。
共享存储系统:共享存储系统是实现 NameNode 的高可用最为关键的部分,共享存储系统保存了 NameNode 在运行过程中所产生的 HDFS 的元数据。主 NameNode 和 NameNode 通过共享存储系统实现元数据同步。在进行主备切换的时候,新的主 NameNode 在确认元数据完全同步之后才能继续对外提供服务。
DataNode 节点:除了通过共享存储系统共享 HDFS 的元数据信息之外,主 NameNode 和备 NameNode 还需要共享 HDFS 的数据块和 DataNode 之间的映射关系。DataNode 会同时向主 NameNode 和备 NameNode 上报数据块的位置信息。
Hadoop 主节点免密登录配置
NameNode 主节点生成密钥(hadoop002)
# ssh-keygen -t rsa
将 hadoop002 的公钥写到本机和远程机器的 ~/ .ssh/authorized_key 文件中(hadoop002)
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop001
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop002
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop003
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop004
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop005
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop006
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop007
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop008
验证免密登录(hadoop002)
# ssh hadoop001
# ssh hadoop002
# ssh hadoop003
# ssh hadoop004
# ssh hadoop005
# ssh hadoop006
# ssh hadoop007
# ssh hadoop008
解压安装(hadoop001)
# tar zxvf hadoop-3.2.1.tar.gz -C /usr/local
# cd /usr/local && mv hadoop-3.2.1 hadoop
配置环境变量(所有 hadoop 节点)
# echo '
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin' >> /etc/profile
配置 Hadoop (hadoop001)
${HADOOP_HOME}/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_261
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_ZKFC_USER=root
export HDFS_JOURNALNODE_USER=root
${HADOOP_HOME}/etc/hadoop/yarn-env.sh
export YARN_RESOURCEMANAGER_USER=root
export HADOOP_SECURE_DN_USER=yarn
export YARN_NODEMANAGER_USER=root
${HADOOP_HOME}/etc/hadoop/core-site.xml
<configuration>
<property>
<!-- 指定 namenode 的 hdfs 协议文件系统的通信地址 -->
<name>fs.defaultFS</name>
<value>hdfs://hdfscluster:8020</value>
</property>
<property>
<!-- 指定 hadoop 集群存储临时文件的目录 -->
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmp</value>
</property>
<!-- 用户角色配置,不配置此项会导致 web 页面报错 -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<property>
<!-- ZooKeeper 集群的地址 -->
<name>ha.zookeeper.quorum</name>
<value>hadoop001:2181,hadoop002:2181,hadoop003:2181,hadoop004:2181,hadoop005:2181</value>
</property>
</configuration>
创建 hadoop 集群存储临时文件的目录
# mkdir -p /home/hadoop/tmp
${HADOOP_HOME}/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<!-- 指定 HDFS 副本的数量 -->
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<!-- namenode 节点数据(即元数据)的存放位置,可以指定多个目录实现容错,多个目录用逗号分隔 -->
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/namenode/data</value>
</property>
<property>
<!-- datanode 节点数据(即数据块)的存放位置 -->
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/datanode/data</value>
</property>
<property>
<!-- 集群服务的逻辑名称 -->
<name>dfs.nameservices</name>
<value>hdfscluster</value>
</property>
<property>
<!-- NameNode ID 列表-->
<name>dfs.ha.namenodes.hdfscluster</name>
<value>nn1,nn2,nn3,nn4</value>
</property>
<property>
<!-- nn1 的 RPC 通信地址 -->
<name>dfs.namenode.rpc-address.hdfscluster.nn1</name>
<value>hadoop001:8020</value>
</property>
<property>
<!-- nn2 的 RPC 通信地址 -->
<name>dfs.namenode.rpc-address.hdfscluster.nn2</name>
<value>hadoop002:8020</value>
</property>
<property>
<!-- nn3 的 RPC 通信地址 -->
<name>dfs.namenode.rpc-address.hdfscluster.nn3</name>
<value>hadoop003:8020</value>
</property>
<property>
<!-- nn4 的 RPC 通信地址 -->
<name>dfs.namenode.rpc-address.hdfscluster.nn4</name>
<value>hadoop004:8020</value>
</property>
<property>
<!-- nn1 的 http 通信地址 -->
<name>dfs.namenode.http-address.hdfscluster.nn1</name>
<value>hadoop001:9870</value>
</property>
<property>
<!-- nn2 的 http 通信地址 -->
<name>dfs.namenode.http-address.hdfscluster.nn2</name>
<value>hadoop002:9870</value>
</property>
<property>
<!-- nn3 的 http 通信地址 -->
<name>dfs.namenode.http-address.hdfscluster.nn3</name>
<value>hadoop003:9870</value>
</property>
<property>
<!-- nn4 的 http 通信地址 -->
<name>dfs.namenode.http-address.hdfscluster.nn4</name>
<value>hadoop004:9870</value>
</property>
<property>
<!-- NameNode 元数据在 JournalNode 上的共享存储目录 -->
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop001:8485;hadoop002:8485;hadoop003:8485;hadoop004:8485;hadoop005:8485;hadoop006:8485;hadoop007:8485/hdfscluster</value>
</property>
<property>
<!-- Journal Edit Files 的存储目录 -->
<name>dfs.journalnode.edits.dir</name>
<value>/home/hadoop/journalnode/data</value>
</property>
<property>
<!-- 配置隔离机制,确保在任何给定时间只有一个 NameNode 处于活动状态 -->
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<!-- 使用 sshfence 机制时需要 ssh 免密登录 -->
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<property>
<!-- 访问代理类,用于确定当前处于 Active 状态的 NameNode -->
<name>dfs.client.failover.proxy.provider.hdfscluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<!-- 开启故障自动转移 -->
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
</configuration>
创建并分发 Journal Edit Files 的存储目录、namenode 节点数据(即元数据)的存放位置、datanode 节点数据(即数据块)的存放位置
# mkdir -p /home/hadoop/journalnode/data
# mkdir -p /home/hadoop/namenode/data
# mkdir -p /home/hadoop/datanode/data
#
# scp -r /home/hadoop root@hadoop002
# scp -r /home/hadoop root@hadoop003
# scp -r /home/hadoop root@hadoop004
# scp -r /home/hadoop root@hadoop005
# scp -r /home/hadoop root@hadoop006
# scp -r /home/hadoop root@hadoop007
# scp -r /home/hadoop root@hadoop008
${HADOOP_HOME}/etc/hadoop/yarn-site.xml
<configuration>
<property>
<!--配置 NodeManager 上运行的附属服务。需要配置成 mapreduce_shuffle 后才可以在 Yarn 上运行 MapReduce 程序。-->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<!-- 是否启用日志聚合 (可选) -->
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<!-- 聚合日志的保存时间 (可选) -->
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
<property>
<!-- 启用 RM HA -->
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<!-- RM 集群标识 -->
<name>yarn.resourcemanager.cluster-id</name>
<value>yarncluster</value>
</property>
<property>
<!-- RM 的逻辑 ID 列表 -->
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2,rm3,rm4</value>
</property>
<property>
<!-- RM1 的服务地址 -->
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop002</value>
</property>
<property>
<!-- RM2 的服务地址 -->
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop003</value>
</property>
<property>
<!-- RM3 的服务地址 -->
<name>yarn.resourcemanager.hostname.rm3</name>
<value>hadoop004</value>
</property>
<property>
<!-- RM4 的服务地址 -->
<name>yarn.resourcemanager.hostname.rm4</name>
<value>hadoop005</value>
</property>
<property>
<!-- RM1 Web 应用程序的地址 -->
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop002:8088</value>
</property>
<property>
<!-- RM2 Web 应用程序的地址 -->
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop003:8088</value>
</property>
<property>
<!-- RM3 Web 应用程序的地址 -->
<name>yarn.resourcemanager.webapp.address.rm3</name>
<value>hadoop004:8088</value>
</property>
<property>
<!-- RM4 Web 应用程序的地址 -->
<name>yarn.resourcemanager.webapp.address.rm4</name>
<value>hadoop005:8088</value>
</property>
<property>
<!-- ZooKeeper 集群的地址 -->
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop001:2181,hadoop002:2181,hadoop003:2181,hadoop004:2181,hadoop005:2181</value>
</property>
<property>
<!-- 启用自动恢复 -->
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<!-- 用于进行持久化存储的类 -->
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
</configuration>
${HADOOP_HOME}/etc/hadoop/mapred-site.xml
<configuration>
<property>
<!--指定 mapreduce 作业运行在 yarn 上-->
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
${HADOOP_HOME}/etc/hadoop/workers
hadoop001
hadoop002
hadoop003
hadoop004
hadoop005
hadoop006
hadoop007
hadoop008
分发程序
# scp -r /usr/local/hadoop hadoop002:/usr/local/
# scp -r /usr/local/hadoop hadoop003:/usr/local/
# scp -r /usr/local/hadoop hadoop004:/usr/local/
# scp -r /usr/local/hadoop hadoop005:/usr/local/
# scp -r /usr/local/hadoop hadoop006:/usr/local/
# scp -r /usr/local/hadoop hadoop007:/usr/local/
# scp -r /usr/local/hadoop hadoop008:/usr/local/
启动集群
启动 JournalNode (所有 JournalNode)
# hdfs --daemon start journalnode
初始化 NameNode (hadoop001,主 NameNode)
# hdfs namenode -format
启动初始化后的 NameNode (hadoop001,主 NameNode)
# hdfs --daemon start namenode
同步 NameNode 信息 (其它 NameNode 节点,hadoop002、hadoop003、hadoop004)
# hdfs namenode -bootstrapStandby
初始化 HA 状态(任意一台 NameNode,hadoop001)
# hdfs zkfc -formatZK
启动 HDFS(hadoop001,执行后所有节点的 NameNode 和 DataNode 都会启动)
# start-dfs.sh
如果 DataNode 服务没有启动,一般是 clusterID 和 NameNode 不匹配的原因,修复方法是在 $HADOOP_HOME/logs/hadoop-root-datanode-hadoop001.log 的错误提示中找到 NameNode 的 clusterID 替换 /home/hadoop/datanode/data/current/VERSION 中的 clusterID 字段,然后分发此文件重启 dfs 即可。
启动 YARN 集群(hadoop002,执行后所有节点的 ResourceManager 和 NodeManager 都会启动,若有的节点 ResourceManager 服务没有启动的话,需要使用
yarn-daemon.sh start resourcemanager
手动启动 )
# start-yarn.sh
HBase 高可用集群搭建
HBase 高可用集群规划
hadoop001: Region Server
hadoop002: Region Server
hadoop003: Region Server
hadoop004: Region Server
hadoop005: Region Server
hadoop006: Region Server、备Master
hadoop007: Region Server、备Master
hadoop008: Region Server、主Master
HBase 主节点免密登录配置
HMaster 主节点生成密钥(hadoop008)
# ssh-keygen -t rsa
将 hadoop008 的公钥写到本机和远程机器的 ~/ .ssh/authorized_key 文件中(hadoop008)
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop001
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop002
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop003
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop004
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop005
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop006
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop007
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop008
验证免密登录(hadoop008)
# ssh hadoop001
# ssh hadoop002
# ssh hadoop003
# ssh hadoop004
# ssh hadoop005
# ssh hadoop006
# ssh hadoop007
# ssh hadoop008
解压安装(hadoop008)
# tar zxvf hbase-2.2.5-bin.tar.gz -C /usr/local/
# cd /usr/local
# mv hbase-2.2.5 hbase
配置环境变量(所有 HBase 节点)
# echo '
export HBASE_HOME=/usr/local/hbase
export PATH=$PATH:$HBASE_HOME/bin' >> /etc/profile
# source /etc/profile
配置 HBase (hadoop008)
$HBASE_HOME/conf/hbase-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_261
export HBASE_MANAGES_ZK=false
export HBASE_CLASSPATH=/usr/local/hadoop/etc/hadoop
$HBASE_HOME/conf/hbase-site.xml
<configuration>
<!--<property>-->
<!-- 配置hbase的主节点 -->
<!-- <name>hbase.master</name>-->
<!-- <value>hadoop008:60000</value>-->
<!--</property>-->
<property>
<!-- 是否为集群模式:true -->
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/usr/local/hbase/tmp</value>
</property>
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
<property>
<!-- 指定 zookeeper 的地址-->
<name>hbase.zookeeper.quorum</name>
<value>hadoop001:2181,hadoop002:2181,hadoop003:2181,hadoop004:2181,hadoop005:2181</value>
</property>
<property>
<!-- 指定 hbase 在 HDFS 上的存储位置 -->
<name>hbase.rootdir</name>
<value>hdfs://hdfscluster:8020/hbase</value>
</property>
<property>
<!-- 指定 hbase 主节点的端口 -->
<name>hbase.master.port</name>
<value>16000</value>
</property>
<property>
<!-- 指定 hbase region server 的端口 -->
<name>hbase.regionserver.port</name>
<value>16020</value>
</property>
<property>
<!-- 指定 hbase region server 的 web 端口 -->
<name>hbase.regionserver.info.port</name>
<value>16030</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/usr/local/zookeeper/hbase/data</value>
</property>
</configuration>
创建 HBase 临时文件存储目录及 ZK 数据存储目录
# mkdir -p /usr/local/hbase/tmp
# mkdir -p /usr/local/zookeeper/hbase/data
regionservers
hadoop001
hadoop002
hadoop003
hadoop004
hadoop005
hadoop006
hadoop007
hadoop008
backup-masters
# touch /usr/local/hbase/conf/backup-masters
# echo 'hadoop006
hadoop007' > /usr/local/hbase/conf/backup-masters
分发 HBase (hadoop008)
# scp -r /usr/local/hbase hadoop001:/usr/local/
# scp -r /usr/local/hbase hadoop002:/usr/local/
# scp -r /usr/local/hbase hadoop003:/usr/local/
# scp -r /usr/local/hbase hadoop004:/usr/local/
# scp -r /usr/local/hbase hadoop005:/usr/local/
# scp -r /usr/local/hbase hadoop006:/usr/local/
# scp -r /usr/local/hbase hadoop007:/usr/local/
# scp -r /usr/local/zookeeper/hbase hadoop001:/usr/local/zookeeper/
# scp -r /usr/local/zookeeper/hbase hadoop002:/usr/local/zookeeper/
# scp -r /usr/local/zookeeper/hbase hadoop003:/usr/local/zookeeper/
# scp -r /usr/local/zookeeper/hbase hadoop004:/usr/local/zookeeper/
# scp -r /usr/local/zookeeper/hbase hadoop005:/usr/local/zookeeper/
# scp -r /usr/local/zookeeper/hbase hadoop006:/usr/local/zookeeper/
# scp -r /usr/local/zookeeper/hbase hadoop007:/usr/local/zookeeper/
启动 HBase 集群 (主节点HMaster – hadoop008)
主节点执行后会自动启动所有 RegionServer 及 所有HMaster 服务
# start-hbase.sh
Kafka 高可用集群部署
Kafka 高可用集群规划
hadoop001: zookeeper、kafka、kafka-manager
hadoop002: zookeeper、kafka
hadoop003: zookeeper、kafka
hadoop004: zookeeper、kafka
hadoop005: zookeeper、kafka
hadoop006:
hadoop007:
hadoop008:
Kafka 主节点免密登录配置
Kafka 主节点生成密钥(hadoop001)
# ssh-keygen -t rsa
将 hadoop001 的公钥写到本机和远程机器的 ~/ .ssh/authorized_key 文件中(hadoop001)
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop001
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop002
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop003
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop004
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop005
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop006
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop007
# ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop008
验证免密登录(hadoop001)
# ssh hadoop001
# ssh hadoop002
# ssh hadoop003
# ssh hadoop004
# ssh hadoop005
# ssh hadoop006
# ssh hadoop007
# ssh hadoop008
解压安装(hadoop001)
# tar zxvf kafka_2.13-2.6.0.tgz -C /usr/local/
# cd /usr/local
# mv kafka_2.13-2.6.0 kafka
配置环境变量(所有 Kafka 节点)
# echo '
export KAFKA_HOME=/usr/local/kafka
export PATH=$KAFKA_HOME/bin:$PATH' >> /etc/profile
# source /etc/profile
配置 Kafka (hadoop001)
$KAFKA_HOME/conf/server.properties
# 对应 ZK 的 myid,集群中每个节点的唯一标识
broker.id=1
# 监听地址
listeners=PLAINTEXT://hadoop001:9092
advertised.listeners=PLAINTEXT://hadoop001:9092
# 数据的存储位置
log.dirs=/usr/local/kafka/logs
# Zookeeper连接地址
zookeeper.connect=hadoop001:2181,hadoop002:2181,hadoop003:2181,hadoop004:2181,hadoop005:2181
创建日志存储目录
# mkdir /usr/local/kafka/logs
分发 kafka 到各个节点(hadoop001)
# scp -r kafka hadoop002:/usr/local/
# scp -r kafka hadoop003:/usr/local/
# scp -r kafka hadoop004:/usr/local/
# scp -r kafka hadoop005:/usr/local/
修改分发节点的 Kafka 配置文件 – $KAFKA_HOME/conf/server.properties(hadoop002-005)
hadoop002
broker.id=2
listeners=PLAINTEXT://hadoop002:9092
advertised.listeners=PLAINTEXT://hadoop002:9092
hadoop003
broker.id=3
listeners=PLAINTEXT://hadoop003:9092
advertised.listeners=PLAINTEXT://hadoop003:9092
hadoop004
broker.id=4
listeners=PLAINTEXT://hadoop004:9092
advertised.listeners=PLAINTEXT://hadoop004:9092
hadoop005
broker.id=5
listeners=PLAINTEXT://hadoop005:9092
advertised.listeners=PLAINTEXT://hadoop005:9092
启动 Kafka 集群(所有 Kafka 节点)
# kafka-server-start.sh -daemon /usr/local/kafka/config/server.properties
Flink 高可用集群部署
Flink 高可用集群规划
hadoop001: zookeeper、主 JobManager
hadoop002: zookeeper、备 JobManager
hadoop003: zookeeper、备 JobManager
hadoop004: zookeeper、备 JobManager
hadoop005: zookeeper、TaskManager
hadoop006: TaskManager
hadoop007: TaskManager
hadoop008: TaskManager
解压安装(hadoop001)
# tar zxvf flink-1.11.2-bin-scala_2.12.tgz -C /usr/local/
# cd /usr/local
# mv flink-1.11.2 flink
配置环境变量(所有 Flink 节点)
# echo '
export FLINK_HOME=/usr/local/flink
export PATH=$FLINK_HOME/bin:$PATH' >> /etc/profile
# source /etc/profile
配置 Flink (hadoop001)
${FLINK_HOME}/conf/flink-conf.yaml
high-availability: zookeeper
high-availability.zookeeper.quorum: hadoop001:2181,hadoop002:2181,hadoop003:2181,hadoop004:2181,hadoop005:2181
high-availability.zookeeper.path.root: /flink
high-availability.cluster-id: /flinkcluster
high-availability.storageDir: hdfs:///flink/recovery
# high-availability.storageDir: hdfs://hdfscluster:8020/flink/recovery
${FLINK_HOME}/conf/masters
hadoop001:8081
hadoop002:8081
hadoop003:8081
hadoop004:8081
${FLINK_HOME}/conf/workers
hadoop005
hadoop006
hadoop007
hadoop008
分发 Flink (hadoop001)
# scp -r /usr/local/flink hadoop002:/usr/local/
# scp -r /usr/local/flink hadoop003:/usr/local/
# scp -r /usr/local/flink hadoop004:/usr/local/
# scp -r /usr/local/flink hadoop005:/usr/local/
# scp -r /usr/local/flink hadoop006:/usr/local/
# scp -r /usr/local/flink hadoop007:/usr/local/
# scp -r /usr/local/flink hadoop008:/usr/local/
启动 Flink 集群
启动 ZooKeeper 集群(所有 ZK 节点)
# zkServer.sh start
启动 Hadoop 集群(hadoop001)
## 启动 dfs 高可用
# start-dfs.sh
## 启动 yarn 高可用
# start-yarn.sh
启动 Flink 集群(hadoop001)
# /usr/local/flink/bin/start-cluster.sh
Flink 启动常见错误
Caused by: org.apache.flink.core.fs.UnsupportedFileSystemSchemeException: Hadoop is not in
the classpath/dependencies.
是因为在 classpath 目录下找不到 Hadoop 的相关依赖,从下面地址下载对应 hadoop
版本的 flink-shaded-hadoop-3-uber
的 jar
包,拷贝到所有 Flink
的 ${FLINK_HOME}/lib
目录中重启 Flink
即可,测试发现 3.2.1
的 Hadoop
可以使用 3.1.1
的依赖包。
https://repository.cloudera.com/artifactory/libs-release-local/org/apache/flink/flink-shaded-hadoop-3-uber/
Spark 高可用集群部署
Spark 高可用集群规划
hadoop001: zookeeper、主spark master、spark worker
hadoop002: zookeeper、备spark master、spark worker
hadoop003: zookeeper、备spark master、spark worker
hadoop004: zookeeper、备spark master、spark worker
hadoop005: zookeeper、spark worker
hadoop006: spark worker
hadoop007: spark worker
hadoop008: spark worker
解压安装(hadoop001)
# tar zxvf spark-3.0.1-bin-hadoop3.2.tgz -C /usr/local/
# cd /usr/local
# mv spark-3.0.1-bin-hadoop3.2 spark
配置环境变量(所有 Spark 节点)
# echo '
export SPARK_HOME=/usr/local/spark
export PATH=$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH' >> /etc/profile
# source /etc/profile
配置 Spark (hadoop001)
${SPARK_HOME}/conf/spark-env.sh
JAVA_HOME=/usr/java/jdk1.8.0_261
HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=hadoop001:2181,hadoop002:2181,hadoop003:2181,hadoop004:2181,hadoop005:2181 -Dspark.deploy.zookeeper.dir=/spark"
${SPARK_HOME}/conf/slaves – 配置所有 Worker 节点列表
# cp /usr/local/spark/conf/slaves.template /usr/local/spark/conf/slaves
# echo 'hadoop001
hadoop002
hadoop003
hadoop004
hadoop005
hadoop006
hadoop007
hadoop008' > /usr/local/spark/conf/slaves
分发 Spark
# scp -r /usr/local/spark hadoop002:/usr/local/
# scp -r /usr/local/spark hadoop003:/usr/local/
# scp -r /usr/local/spark hadoop004:/usr/local/
# scp -r /usr/local/spark hadoop005:/usr/local/
# scp -r /usr/local/spark hadoop006:/usr/local/
# scp -r /usr/local/spark hadoop007:/usr/local/
# scp -r /usr/local/spark hadoop008:/usr/local/
启动 Spark 集群
启动 ZooKeeper 集群(所有 ZK 节点)
# zkServer.sh start
启动 Hadoop 集群(hadoop001)
## 启动 dfs 高可用
# start-dfs.sh
## 启动 yarn 高可用
# start-yarn.sh
启动 Spark 集群
## 启动所有的 worker 和 主 master -- hadoop001
# /usr/local/spark/sbin/start-all.sh
## 分别到所有备 master 节点启动所有的备 master -- hadoop001-004
# /usr/local/spark/sbin/start-master.sh
验证集群高可用
使用 kill
命令杀死 hadoop001
的 Master
进程,此时备用 Master
进程中会有一个成为 主 Master
,再次启动 hadoop001
的 Master
服务,其会作为 备 Master
存在。
集群启停顺序
启动
ZooKeeper -> Hadoop(DFS) -> Yarn -> HBase -> Spark Master -> Spark Worker -> Kafka -> Flink
## ZK 服务需要在每个 ZK 节点执行
# zkServer.sh start
## Hadoop 主节点(hadoop001)
# start-dfs.sh
## Yarn 主节点(hadoop002或hadoop001)
# start-yarn.sh
## HBase 主节点(hadoop008)
# start-hbase.sh
## Spark 主节点(hadoop001)
# /usr/local/spark/sbin/start-all.sh
## Spark Worker 节点(hadoop001-004)
# /usr/local/spark/sbin/start-master.sh
## Kafka 服务需要在每个 Kafka 节点执行
# kafka-server-start.sh -daemon /usr/local/kafka/config/server.properties
## Flink 主节点(hadoop001)
# /usr/local/flink/bin/start-cluster.sh
停止
Flink -> Kafka -> Spark Worker -> Spark Master -> HBase -> Yarn -> Hadoop(DFS) -> ZooKeeper
## Flink 主节点(hadoop001)
# /usr/local/flink/bin/stop-cluster.sh
## Kafka 服务需要在每个 Kafka 节点执行
# kafka-server-stop.sh
## Spark Worker 节点(hadoop001-004)
# /usr/local/spark/sbin/stop-master.sh
## Spark 主节点(hadoop001)
# /usr/local/spark/sbin/stop-all.sh
## HBase 主节点(hadoop008)
# stop-hbase.sh
## Yarn 主节点(hadoop002)
# stop-yarn.sh
## Hadoop 主节点(hadoop001)
# stop-dfs.sh
## ZK 服务需要在每个 ZK 节点执行
# zkServer.sh stop
Hadoop 生态圈常用组件 WebUI
服务 | 端口 |
---|---|
Hadoop | 8042 |
Hadoop HDFS NameNode | 9870 |
Hadoop HDFS DataNode | 9864 |
Yarn JournalNode | 8480 |
Yarn ResourceManager | 8088 |
HBase Master | 16010 |
HBase RegionServer | 16030 |
Spark Master | 8082 |
Spark Worker | 8083 |
Kafka(非 Web 端口) | 9092 |
Flink | 8081 |
ZooKeeper(非 Web 端口) | 2181 |