需求: 对文本文件的数字,取最大的前3个
Java版本:
代码语言:javascript复制public class Top3 {
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("Top3").setMaster("local");
JavaSparkContext sc= new JavaSparkContext(conf);
JavaRDD<String> lines = sc.textFile("C:/Users/zhang/Desktop/a.txt");
JavaPairRDD<Integer, String> pairs =lines.mapToPair(new PairFunction<String, Integer, String>() {
@Override
public Tuple2<Integer, String> call(String t) throws Exception {
// TODO Auto-generated method stub
return new Tuple2<Integer, String>(Integer.valueOf(t), t);
}
});
JavaPairRDD<Integer, String> sortedPairs = pairs.sortByKey(false);
JavaRDD<Integer> sortedNumbers = sortedPairs.map(new Function<Tuple2<Integer,String>, Integer>() {
@Override
public Integer call(Tuple2<Integer, String> v1) throws Exception {
// TODO Auto-generated method stub
return v1._1;
}
});
List<Integer> sortedNumberList = sortedNumbers.take(3);
for(Integer num : sortedNumberList){
System.out.println(num);
}
sc.close();
}
}
Scala版本
代码语言:javascript复制object Top3 {
def main(args: Array[String]){
val conf = new SparkConf().setAppName("Top3").setMaster("local")
val sc = new SparkContext(conf)
val lines = sc.textFile("C:/Users/zhang/Desktop/a.txt", 1)
val pairs = lines.map { line => (line.toInt,line) }
val sortedPairs = pairs.sortByKey(false)
val sortedNumbers = sortedPairs.map(sortedPair => sortedPair._1)
val top3Number = sortedNumbers.take(3)
for(num <- top3Number){
println(num)
}
}
}