MapReduce集群运行模式详细步骤(小白也看的懂的步骤)

2021-04-13 12:05:53 浏览数 (1)

其实这个步骤没有那么复杂

第一步:WordCountMap 代码

代码语言:javascript复制
package com.czxy.Test01;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;


public class WordCountMap extends Mapper {  

@Override    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {        


String datas = value.toString();      

String[] split = datas.split(",");  

for (String s : split) {          

context.write(new Text(s),new LongWritable(1));  

             }   

       }

}

第二步:WordCountReduce 代码

代码语言:javascript复制
package com.czxy.Test01;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;

public class WordCountReduce  extends Reducer {

@Override    protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {      

int sum=0;       

for (LongWritable value : values) {         

         sum =value.get();        
}   

context.write(key,new LongWritable(sum));  


            }

}

第三步:WordCountDriver代码

代码语言:javascript复制
package com.czxy.Test01;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class WordCountDriver  extends Configured implements Tool {    

@Override    public int run(String[] args) throws Exception {     

Job job = Job.getInstance(new Configuration(),"WordCount");        
job.setJarByClass(WordCountDriver.class);   

job.setInputFormatClass(TextInputFormat.class);        
这里的路径是  在hdfs下创建一个文件夹  将你有数据的文件上传到文件夹下
TextInputFormat.addInputPath(job,new Path("hdfs://192.168.100.105:8020/ccc"));   


job.setMapperClass(WordCountMap.class);        
job.setMapOutputKeyClass(Text.class);        
job.setMapOutputValueClass(LongWritable.class);  

job.setReducerClass(WordCountReduce.class);       
job.setOutputKeyClass(Text.class);      
job.setOutputValueClass(LongWritable.class);  


job.setOutputFormatClass(TextOutputFormat.class);    
(这里的是  输出路径 )    
TextOutputFormat.setOutputPath(job,new Path("/bbb"));   

return job.waitForCompletion(true)?0:1;    

}  


public static void main(String[] args) throws Exception {     

int run = ToolRunner.run(new WordCountDriver(), args); 

}}

第四步:打包项目

第五步:将两个jar 上传到你的Linux系统上

第六步:集群运行

右键点击WordCountDirver 然后Copy Reference

可以在集群的任意一个节点上用hadoop命令启动

hadoop jar original-mapreduce-1.0-SNAPSHOT.jar com.czxy.Test01.WordCountDriver (这个jar后面就是你 Copy的 Reference)

(提醒一下这里有很多朋友运行会出错 首先让你的集群启动 然后jps查看一下 在用hadoop命令启动)

0 人点赞