使用Eclipse编译运行MapReduce程序

2022-09-24 09:59:27 浏览数 (1)

详细的配置文档

mapreduce也是比较久学的,详细的内容和操作可以看下面的文档。 点击下载 链接:https://pan.baidu.com/s/1BIBpClKy2xcqAJtxUJoYVA 提取码:ctca

1. WordCount

统计一堆文件中单词出现的个数 代码如下 * TokenizerMapper.java

代码语言:javascript复制
package com.test;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {

    public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
        String line=value.toString();
        String[] words=line.split(" ");
        for(String word:words){
            context.write(new Text(word), new IntWritable(1));
        }
    }

}
  • IntSumReducer.java
代码语言:javascript复制
package com.test;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    public static Integer num=0;
    public void reduce(Text key2,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{
        Integer count=0;
        num  ;
        for(IntWritable value:values){
            count =value.get();
        }
        Text key1=new Text(num.toString() " " key2);
        context.write(key1, new IntWritable(count));
    }

}
  • WordCount.java
代码语言:javascript复制
package com.test;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount {
    public WordCount(){

    }

    public static void main(String[] args)throws Exception {
        // TODO Auto-generated method stub
        Configuration conf=new Configuration();
        Job job=Job.getInstance(conf, "wordcount");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setReducerClass(IntSumReducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path("hdfs://192.168.119.128:9000/input"));
        FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.119.128:9000/output"));
        System.exit(job.waitForCompletion(true)?0:1);

    }

}

运行结果

2. RemoveSame

去除一堆文件中重复出现的单词 * rsmapper.java

代码语言:javascript复制
package removesame;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class rsmapper extends Mapper<Object, Text, Text, NullWritable> {

    public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();
        context.write(new Text(line), NullWritable.get());
    }
}
  • rsreduce.java
代码语言:javascript复制
package removesame;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class rsreduce extends Reducer<Text, NullWritable, IntWritable, Text> {
    public static int num=0;
    public void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
        // process values
        context.write(new IntWritable(num),key);
        num  ;  
    }
}
  • rsmapreduce.java
代码语言:javascript复制
package removesame;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class rsmapreduce {

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        //是否运行为本地模式,就是看这个参数值是否为local,默认就是local
        conf.set("fs.defaultFS", "file:///"); 
        Job job = Job.getInstance(conf, "JobName");
        job.setJarByClass(rsmapreduce.class);
        job.setMapperClass(rsmapper.class);
        job.setReducerClass(rsreduce.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.setInputPaths(job, new Path("F:\native_file\removesame\input"));
        FileOutputFormat.setOutputPath(job, new Path("F:\native_file\removesame\output"));

        if (!job.waitForCompletion(true))
            return;
    }

}

结果如下

3. Sort

使用mapreduce,给一堆数据进行排序

代码如下

代码语言:javascript复制
package sort;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Sort {

    public static class Map extends Mapper<Object,Text,IntWritable,NullWritable>{
         private static IntWritable data=new IntWritable();
           //实现map函数
         public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
            String line=value.toString();
            data.set(Integer.parseInt(line));
            context.write(data, NullWritable.get());
            }
        }
       public static class Reduce extends Reducer<IntWritable,NullWritable,IntWritable,NullWritable>{
            public void reduce(IntWritable key,Iterable<NullWritable> values,Context context) throws IOException,InterruptedException{
                context.write(key, NullWritable.get());
           }
        }
      public static void main(String[] args) throws Exception{
           Configuration conf = new Configuration();
         //设置以后可以读取本地文件
           conf.set("fs.defaultFS", "file:///"); 
           Job job= Job.getInstance(conf,"Data Sort");
           job.setJarByClass( Sort.class);
           job.setMapperClass( Map.class);
           job.setReducerClass( Reduce.class);
           job.setMapOutputKeyClass(IntWritable.class);
           job.setMapOutputValueClass(NullWritable.class);
           job.setOutputKeyClass(IntWritable.class);
           job.setOutputValueClass(NullWritable.class);
           FileInputFormat.setInputPaths(job, new Path("F:\native_file\sort\input"));
            FileOutputFormat.setOutputPath(job, new Path("F:\native_file\sort\output"));
            boolean finish=job.waitForCompletion( true );
           if(finish){
               System.out.println("Congratulations");
           }
      }

}

运行结果

排序结果

0 人点赞