详细的配置文档
mapreduce也是比较久学的,详细的内容和操作可以看下面的文档。 点击下载 链接:https://pan.baidu.com/s/1BIBpClKy2xcqAJtxUJoYVA 提取码:ctca
1. WordCount
统计一堆文件中单词出现的个数 代码如下 * TokenizerMapper.java
代码语言:javascript复制package com.test;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
String line=value.toString();
String[] words=line.split(" ");
for(String word:words){
context.write(new Text(word), new IntWritable(1));
}
}
}
- IntSumReducer.java
package com.test;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public static Integer num=0;
public void reduce(Text key2,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{
Integer count=0;
num ;
for(IntWritable value:values){
count =value.get();
}
Text key1=new Text(num.toString() " " key2);
context.write(key1, new IntWritable(count));
}
}
- WordCount.java
package com.test;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public WordCount(){
}
public static void main(String[] args)throws Exception {
// TODO Auto-generated method stub
Configuration conf=new Configuration();
Job job=Job.getInstance(conf, "wordcount");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setReducerClass(IntSumReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path("hdfs://192.168.119.128:9000/input"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.119.128:9000/output"));
System.exit(job.waitForCompletion(true)?0:1);
}
}
运行结果
2. RemoveSame
去除一堆文件中重复出现的单词 * rsmapper.java
代码语言:javascript复制package removesame;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class rsmapper extends Mapper<Object, Text, Text, NullWritable> {
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
context.write(new Text(line), NullWritable.get());
}
}
- rsreduce.java
package removesame;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class rsreduce extends Reducer<Text, NullWritable, IntWritable, Text> {
public static int num=0;
public void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
// process values
context.write(new IntWritable(num),key);
num ;
}
}
- rsmapreduce.java
package removesame;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class rsmapreduce {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
//是否运行为本地模式,就是看这个参数值是否为local,默认就是local
conf.set("fs.defaultFS", "file:///");
Job job = Job.getInstance(conf, "JobName");
job.setJarByClass(rsmapreduce.class);
job.setMapperClass(rsmapper.class);
job.setReducerClass(rsreduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path("F:\native_file\removesame\input"));
FileOutputFormat.setOutputPath(job, new Path("F:\native_file\removesame\output"));
if (!job.waitForCompletion(true))
return;
}
}
结果如下
3. Sort
使用mapreduce,给一堆数据进行排序
代码如下
代码语言:javascript复制package sort;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Sort {
public static class Map extends Mapper<Object,Text,IntWritable,NullWritable>{
private static IntWritable data=new IntWritable();
//实现map函数
public void map(Object key,Text value,Context context)throws IOException,InterruptedException{
String line=value.toString();
data.set(Integer.parseInt(line));
context.write(data, NullWritable.get());
}
}
public static class Reduce extends Reducer<IntWritable,NullWritable,IntWritable,NullWritable>{
public void reduce(IntWritable key,Iterable<NullWritable> values,Context context) throws IOException,InterruptedException{
context.write(key, NullWritable.get());
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
//设置以后可以读取本地文件
conf.set("fs.defaultFS", "file:///");
Job job= Job.getInstance(conf,"Data Sort");
job.setJarByClass( Sort.class);
job.setMapperClass( Map.class);
job.setReducerClass( Reduce.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("F:\native_file\sort\input"));
FileOutputFormat.setOutputPath(job, new Path("F:\native_file\sort\output"));
boolean finish=job.waitForCompletion( true );
if(finish){
System.out.println("Congratulations");
}
}
}
运行结果
排序结果