HaoopHA之NameNode HA

10

先配置好Haoop集群

配置Zookeeper

Haoop HA的自动故障转移需要Zookeeper的支持,配置好Zookeeper后启动。

NameNode HA配置

编辑配置文件

主要需要配置两个文件: hdfs-site.xmlcore-site.xml

hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>

<!-- 指定HDFS副本的数量 -->
	<property>
		<name>dfs.replication</name>
		<value>3</value>
	</property>

<!--HA集群中不再需要-->
<!-- <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>hadoop103:50090</value>
</property> -->

<!-- 完全分布式集群名称 -->
  <property>
    <name>dfs.nameservices</name>
    <value>mycluster</value>
  </property>

  <!-- 集群中NameNode节点都有哪些 -->
  <property>
    <name>dfs.ha.namenodes.mycluster</name>
    <value>nn1,nn2</value>
  </property>

  <!-- nn1的RPC通信地址 -->
  <property>
    <name>dfs.namenode.rpc-address.mycluster.nn1</name>
    <value>hadoop101:8020</value>
  </property>

  <!-- nn2的RPC通信地址 -->
  <property>
    <name>dfs.namenode.rpc-address.mycluster.nn2</name>
    <value>hadoop102:8020</value>
  </property>

  <!-- nn1的http通信地址 -->
  <property>
    <name>dfs.namenode.http-address.mycluster.nn1</name>
    <value>hadoop101:50070</value>
  </property>

  <!-- nn2的http通信地址 -->
  <property>
    <name>dfs.namenode.http-address.mycluster.nn2</name>
    <value>hadoop102:50070</value>
  </property>

  <!-- 指定NameNode元数据在JournalNode上的存放位置 -->
  <property>
    <name>dfs.namenode.shared.edits.dir</name>
    <value>qjournal://hadoop101:8485;hadoop102:8485;hadoop103:8485/mycluster</value>
  </property>

  <!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
  <property>
    <name>dfs.ha.fencing.methods</name>
    <value>sshfence</value>
  </property>

  <!-- 使用隔离机制时需要ssh无秘钥登录-->
  <property>
    <name>dfs.ha.fencing.ssh.private-key-files</name>
    <value>/root/.ssh/id_rsa</value>
  </property>

  <!-- 声明journalnode服务器存储目录-->
  <property>
    <name>dfs.journalnode.edits.dir</name>
    <value>/opt/module/hadoop-2.7.2/jn/data</value>
  </property>

  <!-- 关闭权限检查-->
  <property>
    <name>dfs.permissions.enable</name>
    <value>false</value>
  </property>

  <!-- 访问代理类:client,mycluster,active配置失败自动切换实现方式-->
  <property>
      <name>dfs.client.failover.proxy.provider.mycluster</name>
      <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  </property>
      
<!--打开自动故障转移-->
<property>
   <name>dfs.ha.automatic-failover.enabled</name>
   <value>true</value>
 </property>

<!--动态节点配置-->
<property>
<name>dfs.hosts</name>
      <value>/opt/module/hadoop-2.7.2/etc/hadoop/dfs.hosts</value>
</property>

<property>
<name>dfs.hosts.exclude</name>
      <value>/opt/module/hadoop-2.7.2/etc/hadoop/dfs.hosts.exclude</value>
</property>

</configuration>

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
<!-- 指定HDFS中NameNode的地址 -->
<property>
	<name>fs.defaultFS</name>
    <value>hdfs://mycluster</value>
</property>

<!-- 指定hadoop运行时产生文件的存储目录 -->
<property>
	<name>hadoop.tmp.dir</name>
	<value>/opt/module/hadoop-2.7.2/data/tmp</value>
</property>

<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop101:2181,hadoop102:2181,hadoop103:2181,hadoop104:2181</value>
</property>

<!--修改core-site.xml中的ipc参数,防止出现连接journalnode服务ConnectException-->
    <property>
        <name>ipc.client.connect.max.retries</name>
        <value>100</value>
        <description>Indicates the number of retries a client will make to establish a server connection.</description>
    </property>
    <property>
        <name>ipc.client.connect.retry.interval</name>
        <value>10000</value>
        <description>Indicates the number of milliseconds a client will wait for before retrying to establish a server connection.</description>
    </property>

</configuration>

将配置文件分发到其他机器

启动集群

在各个JournalNode节点上,输入以下命令启动journalnode服务

$ sbin/hadoop-daemon.sh start journalnode

在[nn1]上,对其进行格式化,并启动

$ bin/hdfs namenode -format

$ sbin/hadoop-daemon.sh start namenode

在[nn2]上,同步nn1的元数据信息,并启动

$ bin/hdfs namenode -bootstrapStandby
$ sbin/hadoop-daemon.sh start namenode

手动把nn1设置为active

$ bin/hdfs haadmin -transitionToActive nn1
查看服务状态
$ bin/hdfs haadmin -getServiceState nn1

初始化Zookeeper Initializing HA state in ZooKeeper

$ hdfs zkfc -formatZK

启动hdfs

$ bin/start-dfs.sh