第5章 YARN:资源调度平台
5.3 YARN集群配置
5.3.1 简单配置
(1) yarn-site.xml
代码语言:javascript复制<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1</value>
<description>ResourceManager所在的节点/description>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>排序服务</description>
</property>
</configuration>
(2) mapred-site.xml
代码语言:javascript复制<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
</configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>启用yarn作为资源管理框架</description>
</property>
</configuration>
(3) slave
代码语言:javascript复制node1
node2
node3
这个文件在《3.4 HDFS集群模式》http://blog.csdn.net/chengyuqiang/article/details/72058013中已经配置过了,这里不再重复配置。 需要注意:这个文件是指定子节点的位置,同时也是帮助Yarn指定NodeManager启动的位置。即如果HDFS没有指定slaves的话,将没有DataNode;如果Yarn没有指定slaves的话,将没有NodeManager。
5.3.2 优化配置
YARN参数调优
(1) yarn-site.xml
代码语言:javascript复制<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>排序服务</description>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>node1:8032</value>
<description>客户端提交任务的ResourceManager的URI/description>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>10</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/hadoop/yarn/local</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,$YARN_HOME/*,$YARN_HOME/lib/*</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>node1:8033</value>
<description>管理命令的URI</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/yarn1/var/log/hadoop-yarn/apps</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>node1:8031</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>27648</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>node1:8088</value>
<description>ResourceManager的web服务URI</description>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>27640</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>96</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>node1:8030</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>9216</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/hadoop/yarn/logs</value>
</property>
</configuration>
(2) mapred-site.xml
代码语言:javascript复制<?xml version="1.0" ?>
<configuration>
<property>
<name>mapreduce.map.memory.mb</name>
<value>2048</value>
<description>map的最大可使用内存</description>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1228m</value>
<description>map的堆内存</description>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>3072</value>
<description>reduce的最大可使用内存</description>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx2457m</value>
<description>reduce堆内存</description>
</property>
<property>
<name>yarn.app.mapreduce.am.command-opts</name>
<value>-Xmx2457m</value>
<description>mapreduce的参数</description>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>614</value>
<description>数据排序时的内存大小</description>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>启用yarn作为资源管理框架</description>
</property>
</configuration>
(3) slave 同上。