其实这个步骤没有那么复杂
第一步:WordCountMap 代码
代码语言:javascript复制package com.czxy.Test01;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMap extends Mapper {
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String datas = value.toString();
String[] split = datas.split(",");
for (String s : split) {
context.write(new Text(s),new LongWritable(1));
}
}
}
第二步:WordCountReduce 代码
代码语言:javascript复制package com.czxy.Test01;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;
public class WordCountReduce extends Reducer {
@Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
int sum=0;
for (LongWritable value : values) {
sum =value.get();
}
context.write(key,new LongWritable(sum));
}
}
第三步:WordCountDriver代码
代码语言:javascript复制package com.czxy.Test01;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCountDriver extends Configured implements Tool {
@Override public int run(String[] args) throws Exception {
Job job = Job.getInstance(new Configuration(),"WordCount");
job.setJarByClass(WordCountDriver.class);
job.setInputFormatClass(TextInputFormat.class);
这里的路径是 在hdfs下创建一个文件夹 将你有数据的文件上传到文件夹下
TextInputFormat.addInputPath(job,new Path("hdfs://192.168.100.105:8020/ccc"));
job.setMapperClass(WordCountMap.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(WordCountReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
(这里的是 输出路径 )
TextOutputFormat.setOutputPath(job,new Path("/bbb"));
return job.waitForCompletion(true)?0:1;
}
public static void main(String[] args) throws Exception {
int run = ToolRunner.run(new WordCountDriver(), args);
}}
第四步:打包项目
第五步:将两个jar 上传到你的Linux系统上
第六步:集群运行
右键点击WordCountDirver 然后Copy Reference
可以在集群的任意一个节点上用hadoop命令启动
hadoop jar original-mapreduce-1.0-SNAPSHOT.jar com.czxy.Test01.WordCountDriver (这个jar后面就是你 Copy的 Reference)
(提醒一下这里有很多朋友运行会出错 首先让你的集群启动 然后jps查看一下 在用hadoop命令启动)