大数据hadoop练习___出租车MRWordCount案例

2022-11-30 13:51:52 浏览数 (1)

WordCountDemo

计算出10月1日这天每小时的载客量

JobMain

代码语言:javascript复制
package input.mapreduceT1;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class JobMain  {
    //该方法用于指定一个job任务
    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration(), "taxi");

        //设置驱动方法
       job.setJarByClass(JobMain.class);

        //设置map,reduce
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);

        //设置map
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        //设置reduce
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //输入输出路径
        FileInputFormat.addInputPath(job,new Path("F:\Personal\Desktop\day05\代码\day05_mapreduce\src\main\java\租车\1.txt"));
        FileOutputFormat.setOutputPath(job,new Path("F:\Personal\Desktop\day05\代码\day05_mapreduce\src\main\java\output\Test1.5"));

        boolean b = job.waitForCompletion(true);
        System.exit(b?0:1);
    }
}

WordCountMapper

代码语言:javascript复制
package input.mapreduceT1;



import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;

/*
  四个泛型解释:
    KEYIN :K1的类型
    VALUEIN: V1的类型

    KEYOUT: K2的类型
    VALUEOUT: V2的类型
 */
public class WordCountMapper  extends Mapper<LongWritable, Text,Text,Text>{
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        //214704,4,2,20121001081102,116.5546341,39.9706421,0,128,1
        //车机标识,触发事件,运营状态,GPS时间,GPS经度,GPS纬度,GPS速度,GPS方向,GPS状态
        String[] split = value.toString().split(",");
        if (split.length >= 4){
            //上车
            if ("1".equals(split[1])){
                //10月1日
                String date = split[3].substring(0, 10);
                //判断GPS是否有效
                if (split[8].equals("1")){
                    if (date.substring(0, 8).endsWith("1001")){
                        //key是车牌号 每小时
                        context.write(new Text(date.substring(8,10)),value);                    }
                }

            }
        }
        //key GPS时间_车机标识 value 1
    }
}

WordCountReducer

代码语言:javascript复制
package input.mapreduceT1;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
/*
  四个泛型解释:
    KEYIN:  K2类型
    VALULEIN: V2类型

    KEYOUT: K3类型
    VALUEOUT:V3类型
 */

public class WordCountReducer extends Reducer<Text,Text,Text,Text>{
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        /**
         * 载客量
         */
        int CarryingCapacity=0;
        HashMap<String, ArrayList<String>> map = new HashMap<>();
        ArrayList<String> list = new ArrayList<>();
        for (Text value : values) {
            String[] split = value.toString().split(",");
            if (split[2].equals("1")){
                CarryingCapacity  ;
            }

        }

        context.write(key,new Text(CarryingCapacity ""));
        //每小时每个车载客量
    }
}

0 人点赞