MapReduce之partition按照key值分区

2021-04-27 10:33:51 浏览数 (1)

1、设置Partitioner类和reducer个数

代码语言:javascript复制
job.setPartitionerClass(MyPartitioner.class);
job.setNumReduceTasks(3);

2、编写Partitioner类 

代码语言:javascript复制
	/*
	 * 对mapper的结果进行分区,让多个reducer分别对多个partition文件并行处理
	 */
	private static class MyPartitioner extends Partitioner{
		
		private static int index = -1;
		private Map map = new HashMap();
		/*
		 * return:返回值为该行数据存储在第几个分区
		 * numPartitions: reduce执行的个数
		 */
		@Override
		public int getPartition(AccounterWritable key, NullWritable value, int numPartitions) {

			String currenKey = key.getAccountName();
			// 判断key是否存在
			if(map.containsKey(currenKey)) {
				return map.get(currenKey);
			}else {
				map.put(currenKey,   index);
				return index;
			}
		}
	}

3、序列化类

代码语言:javascript复制
package com.gxwz.entity;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;
/**
 * @author com
 *
 */
public class AccounterWritable implements WritableComparable{

	private String accountName = "";
	private double orderAmount = 0D;
	
	public String getAccountName() {
		return accountName;
	}

	public void setAccountName(String accountName) {
		this.accountName = accountName;
	}

	public double getOrderAmount() {
		return orderAmount;
	}

	public void setOrderAmount(double orderAmount) {
		this.orderAmount = orderAmount;
	}

	@Override
	public String toString() {
		return accountName   "t"   orderAmount;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeUTF(this.accountName);
		out.writeDouble(this.orderAmount);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		this.accountName = in.readUTF();
		this.orderAmount = in.readDouble();
	}

	@Override
	public int compareTo(AccounterWritable o) {
		return this.getAccountName().compareTo(o.getAccountName());
	}
	
}

4、结果截图

0 人点赞