前提:安装hadoop前需要机器间免密码登录(用aitp账号),本部分配置可参考
在 hadoop1 上执行:
将 hadoop-3.2.1.tar.gz 上传到/home/aitp/software 目录下, 并解压
编辑 /home/aitp/software/hadoop-3.2.1/etc/hadoop/core-site.xml
- <?xml version="1.0" encoding="UTF-8"?>
- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
- <configuration>
- <property>
- <name>fs.defaultFS</name>
- <value>hdfs://mycluster</value>
- </property>
- <property>
- <name>hadoop.tmp.dir</name>
- <value>/data/hadoop/tmpdir</value>
- </property>
- <property>
- <name>ha.zookeeper.quorum</name>
- <value>hadoop1:2181,hadoop2:2181,hadoop3:2181</value>
- </property>
- </configuration>
编辑:/home/aitp/software/hadoop-3.2.1/etc/hadoop/hdfs-site.xml
- <?xml version="1.0" encoding="UTF-8"?>
- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
- <configuration>
- <!-- hdfs HA configuration-->
- <!-- all default configuration can be found at https://hadoop.apache.org/docs/stable|<can be a version liek r3.2.1></can>/hadoop-project-dist/hadoop-hdfs//hdfs-default.xml -->
- <property>
- <name>dfs.ha.automatic-failover.enabled</name>
- <value>true</value>
- </property>
- <!-- dfs.nameservices 这里需要与core-site.xml 中fs.defaultFS 的名称一致-->
- <property>
- <name>dfs.nameservices</name>
- <value>mycluster</value>
- </property>
- <!-- 定义集群中 namenode 列表,这里定义了三个namenode,分别是nn1,nn2,nn3-->
- <property>
- <name>dfs.ha.namenodes.mycluster</name>
- <value>nn1,nn2,nn3</value>
- </property>
- <!-- namenode nn1的具体定义,这里要和 dfs.ha.namenodes.mycluster 定义的列表对应 -->
- <property>
- <name>dfs.namenode.rpc-address.mycluster.nn1</name>
- <value>hadoop1:8020</value>
- </property>
- <property>
- <name>dfs.namenode.rpc-address.mycluster.nn2</name>
- <value>hadoop2:8020</value>
- </property>
- <property>
- <name>dfs.namenode.rpc-address.mycluster.nn3</name>
- <value>hadoop3:8020</value>
- </property>
- <!-- namenode nn1的具体定义,这里要和 dfs.ha.namenodes.mycluster 定义的列表对应 -->
- <property>
- <name>dfs.namenode.http-address.mycluster.nn1</name>
- <value>hadoop1:9870</value>
- </property>
- <property>
- <name>dfs.namenode.http-address.mycluster.nn2</name>
- <value>hadoop2:9870</value>
- </property>
- <property>
- <name>dfs.namenode.http-address.mycluster.nn3</name>
- <value>hadoop3:9870</value>
- </property>
- <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
- <property>
- <name>dfs.namenode.shared.edits.dir</name>
- <value>qjournal://hadoop1:8485;hadoop2:8485;hadoop3:8485/mycluster</value>
- </property>
- <!-- 指定JournalNode在本地磁盘存放数据的位置 -->
- <property>
- <name>dfs.journalnode.edits.dir</name>
- <value>/data/hadoop/journalnode/data</value>
- </property>
- <!-- 配置失败自动切换实现方式 -->
- <property>
- <name>dfs.client.failover.proxy.provider.mycluster</name>
- <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
- </property>
- <!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行-->
- <property>
- <name>dfs.ha.fencing.methods</name>
- <value>sshfence</value>
- </property>
- <!-- 使用sshfence隔离机制时需要ssh免登陆 -->
- <property>
- <name>dfs.ha.fencing.ssh.private-key-files</name>
- <value>/aitpm/.ssh/id_rsa</value>
- </property>
- <!-- 配置sshfence隔离机制超时时间 -->
- <property>
- <name>dfs.ha.fencing.ssh.connect-timeout</name>
- <value>30000</value>
- </property>
- <property>
- <name>dfs.journalnode.http-address</name>
- <value>0.0.0.0:8480</value>
- </property>
- <property>
- <name>dfs.journalnode.rpc-address</name>
- <value>0.0.0.0:8485</value>
- </property>
- <!-- hdfs HA configuration end-->
- <property>
- <name>dfs.replication</name>
- <value>3</value>
- </property>
- <property>
- <name>dfs.namenode.name.dir</name>
- <value>/data/hadoop/hdfs/namenode</value>
- </property>
- <property>
- <name>dfs.datanode.data.dir</name>
- <value>/data/hadoop/hdfs/datanode</value>
- </property>
- <!--开启webhdfs接口访问-->
- <property>
- <name>dfs.webhdfs.enabled</name>
- <value>true</value>
- </property>
- <!-- 关闭权限验证,hive可以直连 -->
- <property>
- <name>dfs.permissions.enabled</name>
- <value>false</value>
- </property>
- </configuration>
编辑:/home/aitp/software/hadoop-3.2.1/etc/hadoop/yarn-site.xml
- <?xml version="1.0"?>
- <configuration>
- <!-- yarn ha configuration-->
- <property>
- <name>yarn.resourcemanager.ha.enabled</name>
- <value>true</value>
- </property>
- <!-- 定义集群名称 -->
- <property>
- <name>yarn.resourcemanager.cluster-id</name>
- <value>cluster1</value>
- </property>
- <!-- 定义本机在在高可用集群中的id 要与 yarn.resourcemanager.ha.rm-ids 定义的值对应,如果不作为resource manager 则删除这项配置。-->
- <property>
- <name>yarn.resourcemanager.ha.id</name>
- <value>rm1</value>
- </property>
- <!-- 定义高可用集群中的 id 列表 -->
- <property>
- <name>yarn.resourcemanager.ha.rm-ids</name>
- <value>rm1,rm2</value>
- </property>
- <!-- 定义高可用RM集群具体是哪些机器 -->
- <property>
- <name>yarn.resourcemanager.hostname.rm1</name>
- <value>hadoop1</value>
- </property>
- <property>
- <name>yarn.resourcemanager.hostname.rm2</name>
- <value>hadoop2</value>
- </property>
- <property>
- <name>yarn.resourcemanager.webapp.address.rm1</name>
- <value>hadoop1:8088</value>
- </property>
- <property>
- <name>yarn.resourcemanager.webapp.address.rm2</name>
- <value>hadoop2:8088</value>
- </property>
- <property>
- <name>hadoop.zk.address</name>
- <value>hadoop1:2181,hadoop2:2181,hadoop3:2181</value>
- </property>
- <!-- Site specific YARN configuration properties -->
- <property>
- <name>yarn.nodemanager.aux-services</name>
- <value>mapreduce_shuffle</value>
- </property>
- </configuration>
编辑 /home/aitp/software/hadoop-3.2.1/etc/hadoop/mapred-site.xml
- <?xml version="1.0"?>
- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
- <configuration>
- <property>
- <name>mapreduce.framework.name</name>
- <value>yarn</value>
- </property>
- <property>
- <name>mapreduce.application.classpath</name>
- <value>
- /home/aitp/software/hadoop-3.2.1/share/hadoop/common/*,
- /home/aitp/software/hadoop-3.2.1/share/hadoop/common/lib/*,
- /home/aitp/software/hadoop-3.2.1/share/hadoop/hdfs/*,
- /home/aitp/software/hadoop-3.2.1/share/hadoop/hdfs/lib/*,
- /home/aitp/software/hadoop-3.2.1/share/hadoop/mapreduce/*,
- /home/aitp/software/hadoop-3.2.1/share/hadoop/mapreduce/lib/*,
- /home/aitp/software/hadoop-3.2.1/share/hadoop/yarn/*,
- /home/aitp/software/hadoop-3.2.1/share/hadoop/yarn/lib/*
- </value>
- </property>
- </configuration>
编辑 /home/aitp/software/hadoop-3.2.1/etc/hadoop/hadoop-env.sh(/usr/lib/jvm/jre-1.8.0换成实际jdk安装目录)
- # The java implementation to use. By default, this environment
- # variable is REQUIRED on ALL platforms except OS X!
- # export JAVA_HOME=
- export JAVA_HOME=/usr/lib/jvm/jre-1.8.0
- # Some parts of the shell code may do special things dependent upon
- # the operating system. We have to set this here. See the next
- # section as to why....
- export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
- export HADOOP_PID_DIR=/home/aitp/software/hadoop-3.2.1/pid
- export HADOOP_LOG_DIR=/var/log/hadoop
编辑 /home/aitp/software/hadoop-3.2.1/etc/hadoop/yarn-env.sh(/usr/lib/jvm/jre-1.8.0换成实际jdk安装目录)
- # Specify the max heapsize for the ResourceManager. If no units are
- # given, it will be assumed to be in MB.
- # This value will be overridden by an Xmx setting specified in either
- # HADOOP_OPTS and/or YARN_RESOURCEMANAGER_OPTS.
- # Default is the same as HADOOP_HEAPSIZE_MAX
- #export YARN_RESOURCEMANAGER_HEAPSIZE=
- export JAVA_HOME=/usr/lib/jvm/jre-1.8.0
编辑 /home/aitp/software/hadoop-3.2.1/sbin/start-dfs.sh, /home/aitp/software/hadoop-3.2.1/sbin/stop-dfs.sh,在脚本开始添加
- HDFS_NAMENODE_USER=aitp
- HDFS_DATANODE_USER=aitp
- HDFS_JOURNALNODE_USER=aitp
- HDFS_ZKFC_USER=aitp
编辑 /home/aitp/software/hadoop-3.2.1/sbin/start-yarn.sh, /home/aitp/software/hadoop-3.2.1/sbin/stop-yarn.sh,在脚本开始添加
- YARN_RESOURCEMANAGER_USER=aitp
- YARN_NODEMANAGER_USER=aitp
修改/home/aitp/software/hadoop-3.2.1/etc/hadoo/workers 为如下内容
- hadoop1
- hadoop2
- hadoop3
拷贝 hadoop-3.2.1 到 hadoop2 hadoop3
- rsync -auvp /home/aitp/software/hadoop-3.2.1 aitp@hadoop2:/home/aitp/software
- rsync -auvp /home/aitp/software/hadoop-3.2.1 aitp@hadoop3:/home/aitp/software
在hadoop2 上执行
需要修改yarn-site.xml的yarn.resourcemanager.ha.id,改为如下内容
- <property>
- <name>yarn.resourcemanager.ha.id</name>
- <value>rm2</value>
- </property>
在hadoop3上执行
删除如下property
- <property>
- <name>yarn.resourcemanager.ha.id</name>
- <value>rm1</value>
- </property>
启动
启动顺序 Zookeeper->JournalNode->格式化NameNode->创建命名空间zkfs->NameNode->Datanode->ResourceManager->NodeManager
启动zookeeper
在所有机器行上执行,顺序 hadoop1 hadoop2 hadoop3
- # 注意,如果使用zsh 需要切换回bash
- #chsh -s /usr/bin/bash
- #如果想用zsh 直接执行,需要使用如下领命,emualte 命令必须安装 oh my zsh 才有。
- # emulate sh -c '/home/aitp/software/zookepper-3.5.6/bin/zkServer.sh start'
- /home/aitp/software/zookepper-3.5.6/bin/zkServer.sh start
- /home/aitp/software/zookepper-3.5.6/bin/zkServer.sh status
启动journalnode
在所有机器行上执行,顺序 hadoop1 hadoop2 hadoop3
- # 注意,如果使用zsh 需要切换回bash
- #chsh -s /usr/bin/bash
-
- /home/aitp/software/hadoop-3.2.1/sbin/hadoop-daemon.sh start journalnode
- # 或者通过 /home/aitp/software/hadoop-3.2.1/bin/hdfs --daemon start journalnode
格式化 Namenode
在hadoop1上执行
- # 注意,如果使用zsh 需要切换回bash
- #chsh -s /usr/bin/bash
- /home/aitp/software/hadoop-3.2.1/bin/hadoop namenode -format
- # 同步格式化之后的元数据到其他namenode,不然可能起不来
- rsync -auvp /data/hadoop/hdfs/namenode/current aitpm@hadoop2:/data/hadoop/hdfs/namenode
- rsync -auvp /data/hadoop/hdfs/namenode/current aitpm@hadoop3:/data/hadoop/hdfs/namenode
- # 格式化ZK
- hdfs zkfc -formatZK
停止 jounalnode
在所有机器上执行
- /home/aitp/software/hadoop-3.2.1/sbin/hadoop-daemon.sh stop journalnode
- # 或者通过 /home/aitp/software/hadoop-3.2.1/bin/hdfs --daemon stop journalnode
启动 hadoop
在hadoop1 上执行
- # 必须在bash 环境下执行,zsh 兼容模式也不行
- start-dfs.sh
- start-yarn.sh
- hdfs haadmin -getAllServiceState
2193 QuorumPeerMain
5252 JournalNode
4886 NameNode
5016 DataNode
5487 DFSZKFailoverController
本文档详细介绍了如何在多台机器上安装和配置Hadoop3.2.1的高可用(HA)集群,包括设置免密码登录、配置core-site.xml、hdfs-site.xml、yarn-site.xml、mapred-site.xml等核心文件,以及启动和检查Zookeeper、JournalNode、NameNode、DataNode、ResourceManager和NodeManager的步骤。
1万+

被折叠的 条评论
为什么被折叠?



