Spark Insert Hbase解决task not to serializable

代码语言：javascript复制

package javasssss;

import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.hive.HiveContext;

import java.util.Iterator;

/**
 * Created by shengjk1 on 2016/8/8.
 * blog address :http://blog.csdn.net/jsjsjs1789
 */
public class SparkInsertHbase {

    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("SparkInsertHbase");
        JavaSparkContext sc = new JavaSparkContext(conf);
        HiveContext hiveContext = new HiveContext(sc.sc());

        DataFrame df = hiveContext.sql("select id,name from test");

        **//froeachPartition  foreah 会报task not  to  serializer。但对mysql来说两者都ok，推荐使用foreachPartition**
        df.toJavaRDD().foreachPartition(new VoidFunction<Iterator<Row>>() {
            private static final long serialVersionUID = -3496935835002029475L;

            @Override
            public void call(Iterator<Row> rowIterator) throws Exception {
                HTable table = new HTable(HBaseConfiguration.create(), "test");
                /*
                hbase 新api
                Configuration config = HBaseConfiguration.create();
                //若此处配置zk，则写错程序会卡死。可通过界面查看日志，解决！
                //也可以不配，但需要classpath路径有hbase-site.xml文件
                config.set("hbase.zookeeper.quorum", "centos2");
                Connection conn= ConnectionFactory.createConnection(config);
                Table table=conn.getTable(TableName.valueOf("test"));
                */

                while (rowIterator.hasNext()) {
                    Row row = rowIterator.next();
                    String id = row.getString(0);
                    String name = row.getString(1);
                    Put put = new Put("f".getBytes());
                    put.addColumn("f".getBytes(), "id".getBytes(), id.getBytes());
                    put.addColumn("f".getBytes(), "name".getBytes(), name.getBytes());

                    table.put(put);
                }


//              String tableName = "test";
//              Table table=conn.getTable(TableName.valueOf(tableName));


            };

        });


    }

}

解决task not to serializable总共有三种办法，具体参照：

http://stackoverflow.com/questions/25250774/writing-to-hbase-via-spark-task-not-serializable

spark hbase java apache hadoop

0 人点赞