安装hadoop的集群hadoop3.2.1

本文档详细介绍了如何在多台机器上安装和配置Hadoop3.2.1的高可用(HA)集群,包括设置免密码登录、配置core-site.xml、hdfs-site.xml、yarn-site.xml、mapred-site.xml等核心文件,以及启动和检查Zookeeper、JournalNode、NameNode、DataNode、ResourceManager和NodeManager的步骤。

前提:安装hadoop前需要机器间免密码登录(用aitp账号),本部分配置可参考

在 hadoop1 上执行:
将 hadoop-3.2.1.tar.gz 上传到/home/aitp/software 目录下, 并解压
编辑 /home/aitp/software/hadoop-3.2.1/etc/hadoop/core-site.xml

  1.  
  2.  
  3. <?xml version="1.0" encoding="UTF-8"?>
  4. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  5. <configuration>
  6.   <property>
  7.     <name>fs.defaultFS</name>
  8.     <value>hdfs://mycluster</value>
  9.   </property>
  10.   <property>
  11.     <name>hadoop.tmp.dir</name>
  12.     <value>/data/hadoop/tmpdir</value>
  13.   </property>
  14.   <property>
  15.     <name>ha.zookeeper.quorum</name>
  16.     <value>hadoop1:2181,hadoop2:2181,hadoop3:2181</value>
  17.   </property>
  18. </configuration>

编辑:/home/aitp/software/hadoop-3.2.1/etc/hadoop/hdfs-site.xml

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3.  
  4. <configuration>
  5.   <!-- hdfs HA configuration-->
  6.   <!-- all default configuration can be found at https://hadoop.apache.org/docs/stable|<can be a version liek r3.2.1></can>/hadoop-project-dist/hadoop-hdfs//hdfs-default.xml -->
  7.  
  8.   <property>
  9.     <name>dfs.ha.automatic-failover.enabled</name>
  10.     <value>true</value>
  11.   </property>
  12.   <!-- dfs.nameservices 这里需要与core-site.xml fs.defaultFS 的名称一致-->
  13.   <property>
  14.     <name>dfs.nameservices</name>
  15.     <value>mycluster</value>
  16.   </property>
  17.   <!-- 定义集群中 namenode 列表,这里定义了三个namenode,分别是nn1,nn2,nn3-->
  18.   <property>
  19.     <name>dfs.ha.namenodes.mycluster</name>
  20.     <value>nn1,nn2,nn3</value>
  21.   </property>
  22.   <!-- namenode nn1的具体定义,这里要和 dfs.ha.namenodes.mycluster 定义的列表对应 -->
  23.   <property>
  24.     <name>dfs.namenode.rpc-address.mycluster.nn1</name>
  25.     <value>hadoop1:8020</value>
  26.   </property>
  27.   <property>
  28.     <name>dfs.namenode.rpc-address.mycluster.nn2</name>
  29.     <value>hadoop2:8020</value>
  30.   </property>
  31.   <property>
  32.     <name>dfs.namenode.rpc-address.mycluster.nn3</name>
  33.     <value>hadoop3:8020</value>
  34.   </property>
  35.   <!-- namenode nn1的具体定义,这里要和 dfs.ha.namenodes.mycluster 定义的列表对应 -->
  36.   <property>
  37.     <name>dfs.namenode.http-address.mycluster.nn1</name>
  38.     <value>hadoop1:9870</value>
  39.   </property>
  40.   <property>
  41.     <name>dfs.namenode.http-address.mycluster.nn2</name>
  42.     <value>hadoop2:9870</value>
  43.   </property>
  44.   <property>
  45.     <name>dfs.namenode.http-address.mycluster.nn3</name>
  46.     <value>hadoop3:9870</value>
  47.   </property>
  48. <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
  49.   <property>
  50.     <name>dfs.namenode.shared.edits.dir</name>
  51.     <value>qjournal://hadoop1:8485;hadoop2:8485;hadoop3:8485/mycluster</value>
  52.   </property>
  53.   <!-- 指定JournalNode在本地磁盘存放数据的位置 -->
  54.   <property>
  55.     <name>dfs.journalnode.edits.dir</name>
  56.     <value>/data/hadoop/journalnode/data</value>
  57.   </property>
  58.   <!-- 配置失败自动切换实现方式 -->
  59.   <property>
  60.     <name>dfs.client.failover.proxy.provider.mycluster</name>
  61.     <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  62.   </property>
  63.   <!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行-->
  64.   <property>
  65.     <name>dfs.ha.fencing.methods</name>
  66.     <value>sshfence</value>
  67.   </property>
  68.   <!-- 使用sshfence隔离机制时需要ssh免登陆 -->
  69.   <property>
  70.     <name>dfs.ha.fencing.ssh.private-key-files</name>
  71.     <value>/aitpm/.ssh/id_rsa</value>
  72.   </property>
  73.   <!-- 配置sshfence隔离机制超时时间 -->
  74.   <property>
  75.     <name>dfs.ha.fencing.ssh.connect-timeout</name>
  76.     <value>30000</value>
  77.   </property>
  78.   <property>
  79.     <name>dfs.journalnode.http-address</name>
  80.     <value>0.0.0.0:8480</value>
  81.   </property>
  82.   <property>
  83.     <name>dfs.journalnode.rpc-address</name>
  84.     <value>0.0.0.0:8485</value>
  85.   </property>
  86.   <!-- hdfs HA configuration end-->
  87.  
  88.   <property>
  89.     <name>dfs.replication</name>
  90.     <value>3</value>
  91.   </property>
  92.   <property>
  93.     <name>dfs.namenode.name.dir</name>
  94.     <value>/data/hadoop/hdfs/namenode</value>
  95.   </property>
  96.   <property>
  97.     <name>dfs.datanode.data.dir</name>
  98.     <value>/data/hadoop/hdfs/datanode</value>
  99.   </property>
  100.   <!--开启webhdfs接口访问-->
  101.   <property>
  102.     <name>dfs.webhdfs.enabled</name>
  103.     <value>true</value>
  104.   </property>
  105. <!-- 关闭权限验证,hive可以直连 -->
  106.   <property>
  107.     <name>dfs.permissions.enabled</name>
  108.     <value>false</value>
  109.   </property>
  110. </configuration>

编辑:/home/aitp/software/hadoop-3.2.1/etc/hadoop/yarn-site.xml

  1. <?xml version="1.0"?>
  2. <configuration>
  3.  
  4.   <!-- yarn ha configuration-->
  5.   <property>
  6.     <name>yarn.resourcemanager.ha.enabled</name>
  7.     <value>true</value>
  8.   </property>
  9.   <!-- 定义集群名称 -->
  10.   <property>
  11.     <name>yarn.resourcemanager.cluster-id</name>
  12.     <value>cluster1</value>
  13.   </property>
  14.   <!-- 定义本机在在高可用集群中的id 要与 yarn.resourcemanager.ha.rm-ids 定义的值对应,如果不作为resource manager 则删除这项配置。-->
  15.   <property>
  16.     <name>yarn.resourcemanager.ha.id</name>
  17.     <value>rm1</value>
  18.   </property>
  19.   <!-- 定义高可用集群中的 id 列表 -->
  20.   <property>
  21.     <name>yarn.resourcemanager.ha.rm-ids</name>
  22.     <value>rm1,rm2</value>
  23.   </property>
  24.   <!-- 定义高可用RM集群具体是哪些机器 -->
  25.   <property>
  26.     <name>yarn.resourcemanager.hostname.rm1</name>
  27.     <value>hadoop1</value>
  28.   </property>
  29.   <property>
  30.     <name>yarn.resourcemanager.hostname.rm2</name>
  31.     <value>hadoop2</value>
  32.   </property>
  33.   <property>
  34.     <name>yarn.resourcemanager.webapp.address.rm1</name>
  35.     <value>hadoop1:8088</value>
  36.   </property>
  37.   <property>
  38.     <name>yarn.resourcemanager.webapp.address.rm2</name>
  39.     <value>hadoop2:8088</value>
  40.   </property>
  41.   <property>
  42.     <name>hadoop.zk.address</name>
  43.     <value>hadoop1:2181,hadoop2:2181,hadoop3:2181</value>
  44.   </property>
  45.  
  46.   <!-- Site specific YARN configuration properties -->
  47.   <property>
  48.     <name>yarn.nodemanager.aux-services</name>
  49.     <value>mapreduce_shuffle</value>
  50.   </property>
  51. </configuration>

编辑 /home/aitp/software/hadoop-3.2.1/etc/hadoop/mapred-site.xml

  1. <?xml version="1.0"?>
  2. <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
  3. <configuration>
  4.   <property>
  5.     <name>mapreduce.framework.name</name>
  6.     <value>yarn</value>
  7.   </property>
  8.   <property>
  9.     <name>mapreduce.application.classpath</name>
  10.     <value> 
  11.         /home/aitp/software/hadoop-3.2.1/share/hadoop/common/*,
  12.         /home/aitp/software/hadoop-3.2.1/share/hadoop/common/lib/*,
  13.         /home/aitp/software/hadoop-3.2.1/share/hadoop/hdfs/*,
  14.         /home/aitp/software/hadoop-3.2.1/share/hadoop/hdfs/lib/*,
  15.         /home/aitp/software/hadoop-3.2.1/share/hadoop/mapreduce/*,
  16.         /home/aitp/software/hadoop-3.2.1/share/hadoop/mapreduce/lib/*,
  17.         /home/aitp/software/hadoop-3.2.1/share/hadoop/yarn/*,
  18.         /home/aitp/software/hadoop-3.2.1/share/hadoop/yarn/lib/*
  19.     </value>
  20.   </property>
  21.  
  22. </configuration>

编辑 /home/aitp/software/hadoop-3.2.1/etc/hadoop/hadoop-env.sh(/usr/lib/jvm/jre-1.8.0换成实际jdk安装目录)

  1.  
  2.  
  3. # The java implementation to use. By default, this environment
  4. # variable is REQUIRED on ALL platforms except OS X!
  5. # export JAVA_HOME=
  6. export JAVA_HOME=/usr/lib/jvm/jre-1.8.0
  7.  
  8. # Some parts of the shell code may do special things dependent upon
  9. # the operating system.  We have to set this here. See the next
  10. # section as to why....
  11. export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
  12. export HADOOP_PID_DIR=/home/aitp/software/hadoop-3.2.1/pid
  13. export HADOOP_LOG_DIR=/var/log/hadoop

编辑 /home/aitp/software/hadoop-3.2.1/etc/hadoop/yarn-env.sh(/usr/lib/jvm/jre-1.8.0换成实际jdk安装目录)

  1. # Specify the max heapsize for the ResourceManager.  If no units are
  2. # given, it will be assumed to be in MB.
  3. # This value will be overridden by an Xmx setting specified in either
  4. # HADOOP_OPTS and/or YARN_RESOURCEMANAGER_OPTS.
  5. # Default is the same as HADOOP_HEAPSIZE_MAX
  6. #export YARN_RESOURCEMANAGER_HEAPSIZE=
  7. export JAVA_HOME=/usr/lib/jvm/jre-1.8.0

编辑 /home/aitp/software/hadoop-3.2.1/sbin/start-dfs.sh, /home/aitp/software/hadoop-3.2.1/sbin/stop-dfs.sh,在脚本开始添加

  1. HDFS_NAMENODE_USER=aitp
  2. HDFS_DATANODE_USER=aitp
  3. HDFS_JOURNALNODE_USER=aitp
  4. HDFS_ZKFC_USER=aitp

编辑 /home/aitp/software/hadoop-3.2.1/sbin/start-yarn.sh, /home/aitp/software/hadoop-3.2.1/sbin/stop-yarn.sh,在脚本开始添加

  1. YARN_RESOURCEMANAGER_USER=aitp
  2. YARN_NODEMANAGER_USER=aitp

修改/home/aitp/software/hadoop-3.2.1/etc/hadoo/workers 为如下内容

  1. hadoop1
  2. hadoop2
  3. hadoop3

拷贝 hadoop-3.2.1 到 hadoop2 hadoop3

  1. rsync -auvp /home/aitp/software/hadoop-3.2.1 aitp@hadoop2:/home/aitp/software
  2.  
  3. rsync -auvp /home/aitp/software/hadoop-3.2.1 aitp@hadoop3:/home/aitp/software

在hadoop2 上执行
需要修改yarn-site.xml的yarn.resourcemanager.ha.id,改为如下内容

  1.   <property>
  2.     <name>yarn.resourcemanager.ha.id</name>
  3.     <value>rm2</value>
  4.   </property>

在hadoop3上执行

删除如下property

  1.   <property>
  2.     <name>yarn.resourcemanager.ha.id</name>
  3.     <value>rm1</value>
  4.   </property>

启动

启动顺序 Zookeeper->JournalNode->格式化NameNode->创建命名空间zkfs->NameNode->Datanode->ResourceManager->NodeManager

启动zookeeper

在所有机器行上执行,顺序 hadoop1 hadoop2 hadoop3

  1. # 注意,如果使用zsh 需要切换回bash
  2. #chsh -s /usr/bin/bash
  3. #如果想用zsh 直接执行,需要使用如下领命,emualte 命令必须安装 oh my zsh 才有。
  4. # emulate sh -c '/home/aitp/software/zookepper-3.5.6/bin/zkServer.sh start'
  5. /home/aitp/software/zookepper-3.5.6/bin/zkServer.sh start
  6. /home/aitp/software/zookepper-3.5.6/bin/zkServer.sh status

启动journalnode

在所有机器行上执行,顺序 hadoop1 hadoop2 hadoop3

  1. # 注意,如果使用zsh 需要切换回bash
  2. #chsh -s /usr/bin/bash
    1. /home/aitp/software/hadoop-3.2.1/sbin/hadoop-daemon.sh start journalnode
  1. # 或者通过 /home/aitp/software/hadoop-3.2.1/bin/hdfs --daemon start journalnode

格式化 Namenode

在hadoop1上执行

  1. # 注意,如果使用zsh 需要切换回bash
  2. #chsh -s /usr/bin/bash
  3. /home/aitp/software/hadoop-3.2.1/bin/hadoop namenode -format
  4. # 同步格式化之后的元数据到其他namenode,不然可能起不来
  5. rsync -auvp /data/hadoop/hdfs/namenode/current aitpm@hadoop2:/data/hadoop/hdfs/namenode
  6. rsync -auvp /data/hadoop/hdfs/namenode/current aitpm@hadoop3:/data/hadoop/hdfs/namenode
  7. # 格式化ZK
  8. hdfs zkfc -formatZK

停止 jounalnode

在所有机器上执行

  1. /home/aitp/software/hadoop-3.2.1/sbin/hadoop-daemon.sh stop journalnode
  2. # 或者通过 /home/aitp/software/hadoop-3.2.1/bin/hdfs --daemon stop journalnode

启动 hadoop

在hadoop1 上执行

  1. # 必须在bash 环境下执行,zsh 兼容模式也不行
  2. start-dfs.sh
  3. start-yarn.sh
  4. hdfs haadmin -getAllServiceState

正常启动后所看到的进程 jps 查看

2193 QuorumPeerMain
5252 JournalNode
4886 NameNode
5016 DataNode
5487 DFSZKFailoverController

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值