Windows+Eclipse+Maven+HBase 1.2.4开发环境搭建

2022-05-06 17:51:35 浏览数 (1)

1. 在Linux集群下已经搭建了Zookeeper Hadoop HBase

hostname

ip

组件

node0

192.168.1.160

zookeeper,namenode,NodeManager,HMaster,HRegionServer

node1

192.168.1.161

zookeeper,datanode,NodeManager,HRegionServer

node2

192.168.1.162

zookeeper,datanode,ResourceManager,HMaster,HRegionServer

2. 在Windows下搭建HBase应用程序开发环境

2.1 安装配置JDK

下载JDK1.8,配置环境变量

2.2 安装配置Maven

下载apache-maven-3.3.9,解压缩,配置环境变量

打开CMD,输入mvn -v

可以看到maven版本信息

2.3 配置Eclipse

目前的eclipse-javee版本已经自带maven插件了 winows-preferences-左边maven可以看到安装好的maven

2.4 创建Maven项目

在Wizards中输入maven

http://mvnrepository.com/ 比如我们需要引入spring核心jar包spring-core,打开Maven Repository,搜索spring-core 选择最新版本3.2.0.RELEASE,可以看到其dependency写法如下红框所示: 将其复制到pom.xml中的中 。这样,Maven就会开始自动下载jar包到本地仓库,然后关联到你的项目中,下载完成后,我们展开工程目录中External Libraries。

代码语言:javascript复制
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>hbaseDemo</groupId>
    <artifactId>hbaseDemo</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>hbaseDemo</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.1</version>
            <scope>test</scope>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.7.3</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.4</version>
        </dependency>
        <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
        </dependency>
    </dependencies>
</project>

题外话,如果是WEB项目,还需要在pom.xml中导入 javaee-api.jar

代码语言:javascript复制
<dependency>
            <groupId>javax</groupId>
            <artifactId>javaee-api</artifactId>
            <version>7.0</version>
</dependency>

2.5 编写应用程序

代码语言:javascript复制
package hbaseDemo.dao;

import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;

public class HbaseDao {
    private static Configuration conf = HBaseConfiguration.create();
    static {
        conf.set("hbase.rootdir", "hdfs://cc/hbase");
        // 设置Zookeeper,直接设置IP地址
        conf.set("hbase.zookeeper.quorum", "192.168.1.160,192.168.1.162,192.168.1.163");
    }

    // 创建表
    public static void createTable(String tablename, String columnFamily) throws Exception {
        Connection connection = ConnectionFactory.createConnection(conf);
        Admin admin = connection.getAdmin();

        TableName tableNameObj = TableName.valueOf(tablename);

        if (admin.tableExists(tableNameObj)) {
            System.out.println("Table exists!");
            System.exit(0);
        } else {
            HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tablename));
            tableDesc.addFamily(new HColumnDescriptor(columnFamily));
            admin.createTable(tableDesc);
            System.out.println("create table success!");
        }
        admin.close();
        connection.close();
    }

    // 删除表
    public static void deleteTable(String tableName) {
        try {
            Connection connection = ConnectionFactory.createConnection(conf);
            Admin admin = connection.getAdmin();
            TableName table = TableName.valueOf(tableName);
            admin.disableTable(table);
            admin.deleteTable(table);
            System.out.println("delete table "   tableName   " ok.");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    // 插入一行记录
    public static void addRecord(String tableName, String rowKey, String family, String qualifier, String value){
        try {
            Connection connection = ConnectionFactory.createConnection(conf);
            Table table = connection.getTable(TableName.valueOf(tableName));
            Put put = new Put(Bytes.toBytes(rowKey));
            put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes.toBytes(value));
            put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes.toBytes(value));
            table.put(put);
            table.close();
            connection.close();
            System.out.println("insert recored "   rowKey   " to table "   tableName   " ok.");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) throws Exception {
        HbaseDao.createTable("testTb", "info");
        HbaseDao.addRecord("testTb", "001", "info", "name", "zhangsan");
        HbaseDao.addRecord("testTb", "001", "info", "age", "20");
        //HbaseDao.deleteTable("testTb");
    }
}

运行结果

代码语言:javascript复制
[root@node0 ~]# hbase shell
2017-04-07 01:51:31,268 WARN  [main] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/lib/hbase/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/lib/hadoop/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
HBase Shell; enter 'help<RETURN>' for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version 1.2.4, r67592f3d062743907f8c5ae00dbbe1ae4f69e5af, Tue Oct 25 18:10:20 CDT 2016

hbase(main):001:0> list
TABLE                                                                                                                                                                       
googlebook                                                                                                                                                                  
testTb                                                                                                                                                                      
2 row(s) in 0.2760 seconds

=> ["googlebook", "testTb"]
hbase(main):002:0> scan 'testTb'
ROW                                          COLUMN CELL                                                                                                                    
 001                                         column=info:age, timestamp=1491544218337, value=20                                                                             
 001                                         column=info:name, timestamp=1491544218154, value=zhangsan                                                                      
1 row(s) in 0.1980 seconds

2.6 批量导入数据

生成批量数据

代码语言:javascript复制
[root@node0 data]# vi gen.sh
[root@node data]# cat gen.sh
#!/bin/sh

for i in {1..100000};do
        echo -e $i't'$RANDOM't'$RANDOM't'$RANDOM
done;
[root@node0 data]# sh gen.sh > mydata.txt
[root@node0 data]# tail -10 mydata.txt
99991   5421    23010   14796
99992   8131    27221   11846
99993   20723   8007    14215
99994   20876   29543   5465
99995   14753   19926   20000
99996   26226   7228    25424
99997   18393   15515   13721
99998   1855    23042   27666
99999   16761   16120   24486
100000  14619   17100   556

上传到HDFS

代码语言:javascript复制
[root@node0 data]# hdfs dfs -put mydata.txt input
[root@node0 data]# hdfs dfs -ls input
Found 1 items
-rw-r--r--   3 root hbase    1698432 2017-07-19 20:38 input/mydata.txt
You have mail in /var/spool/mail/root
[root@node0 data]#

创建HBase表

代码语言:javascript复制
hbase(main):021:0> create 'mydata','info'

pom.xml

代码语言:javascript复制
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>hbaseDemo</groupId>
    <artifactId>hbaseDemo</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>hbaseDemo</name>
    <url>http://maven.apache.org</url>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <hadoop.version>2.7.1</hadoop.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>3.8.1</version>
            <scope>test</scope>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.1.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>1.1.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.1.2</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-protocol -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-protocol</artifactId>
            <version>1.1.2</version>
        </dependency>


        <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.8</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
        </dependency>
    </dependencies>
</project>

编写MapReduce程序

代码语言:javascript复制
package hbaseDemo.dao;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

public class BatchImport {

    public static class BatchImportMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
        protected void map(LongWritable key, Text value, Context context)
                throws java.io.IOException, InterruptedException {
            // super.setup( context );
            //System.out.println(key   ":"   value);
            context.write(key, value);
        };
    }

    static class BatchImportReducer extends TableReducer<LongWritable, Text, NullWritable> {
        protected void reduce(LongWritable key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            for (Text text : values) {
                final String[] splited = text.toString().split("t");
                final Put put = new Put(Bytes.toBytes(splited[0]));// 第一列行键
                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("data1"), Bytes.toBytes(splited[1]));
                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("data2"), Bytes.toBytes(splited[2]));
                put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("data3"), Bytes.toBytes(splited[3]));
                context.write(NullWritable.get(), put);
            }
        };
    }

    /**
     * 之前一直报错,failed on connection exception 拒绝连接:nb0:8020
     * 因为namenode节点不在192.168.1.160上,而在192.168.1.161和192.168.1.162
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        final Configuration conf = new Configuration();
        conf.set("hbase.rootdir", "hdfs://cetc32/hbase");
        // 设置Zookeeper,直接设置IP地址
        conf.set("hbase.zookeeper.quorum", "192.168.1.160,192.168.1.161,192.168.1.162");
        // 设置hbase表名称(先在shell下创建一个表:create 'mydata','info')
        conf.set(TableOutputFormat.OUTPUT_TABLE, "mydata");
        // 将该值改大,防止hbase超时退出
        conf.set("dfs.socket.timeout", "180000");

        //System.setProperty("HADOOP_USER_NAME", "root");
        // 设置fs.defaultFS
        conf.set("fs.defaultFS", "hdfs://192.168.1.161:8020");
        // 设置yarn.resourcemanager节点
        conf.set("yarn.resourcemanager.hostname", "nb1");

        Job job = Job.getInstance(conf);
        job.setJobName("HBaseBatchImport");
        job.setMapperClass(BatchImportMapper.class);
        job.setReducerClass(BatchImportReducer.class);
        // 设置map的输出,不设置reduce的输出类型
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        // 不再设置输出路径,而是设置输出格式类型
        job.setOutputFormatClass(TableOutputFormat.class);

        FileInputFormat.setInputPaths(job, "hdfs://192.168.1.161:8020/user/root/input/mydata.txt");

        boolean flag=job.waitForCompletion(true);
        System.out.println(flag);
    }
}

Eclipse运行

代码语言:javascript复制
log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
true

查看Hbase

代码语言:javascript复制
hbase(main):021:0> count 'mydata'
Current count: 1000, row: 10897                                                                                                                                             
Current count: 2000, row: 11797                                                                                                                                             
Current count: 3000, row: 12697                                                                                                                                             
Current count: 4000, row: 13597                                                                                                                                             
Current count: 5000, row: 14497                                                                                                                                             
Current count: 6000, row: 15397                                                                                                                                             
Current count: 7000, row: 16297                                                                                                                                             
Current count: 8000, row: 17197                                                                                                                                             
Current count: 9000, row: 18097                                                                                                                                             
Current count: 10000, row: 18998                                                                                                                                            
Current count: 11000, row: 19898                                                                                                                                            
Current count: 12000, row: 20797                                                                                                                                            
Current count: 13000, row: 21697                                                                                                                                            
Current count: 14000, row: 22597                                                                                                                                            
Current count: 15000, row: 23497                                                                                                                                            
Current count: 16000, row: 24397                                                                                                                                            
Current count: 17000, row: 25297                                                                                                                                            
Current count: 18000, row: 26197                                                                                                                                            
Current count: 19000, row: 27097                                                                                                                                            
Current count: 20000, row: 27998                                                                                                                                            
Current count: 21000, row: 28898                                                                                                                                            
Current count: 22000, row: 29798                                                                                                                                            
Current count: 23000, row: 30697                                                                                                                                            
Current count: 24000, row: 31597                                                                                                                                            
Current count: 25000, row: 32497                                                                                                                                            
Current count: 26000, row: 33397                                                                                                                                            
Current count: 27000, row: 34297                                                                                                                                            
Current count: 28000, row: 35197                                                                                                                                            
Current count: 29000, row: 36097                                                                                                                                            
Current count: 30000, row: 36998                                                                                                                                            
Current count: 31000, row: 37898                                                                                                                                            
Current count: 32000, row: 38798                                                                                                                                            
Current count: 33000, row: 39698                                                                                                                                            
Current count: 34000, row: 40597                                                                                                                                            
Current count: 35000, row: 41497                                                                                                                                            
Current count: 36000, row: 42397                                                                                                                                            
Current count: 37000, row: 43297                                                                                                                                            
Current count: 38000, row: 44197                                                                                                                                            
Current count: 39000, row: 45097                                                                                                                                            
Current count: 40000, row: 45998                                                                                                                                            
Current count: 41000, row: 46898                                                                                                                                            
Current count: 42000, row: 47798                                                                                                                                            
Current count: 43000, row: 48698                                                                                                                                            
Current count: 44000, row: 49598                                                                                                                                            
Current count: 45000, row: 50497                                                                                                                                            
Current count: 46000, row: 51397                                                                                                                                            
Current count: 47000, row: 52297                                                                                                                                            
Current count: 48000, row: 53197                                                                                                                                            
Current count: 49000, row: 54097                                                                                                                                            
Current count: 50000, row: 54998                                                                                                                                            
Current count: 51000, row: 55898                                                                                                                                            
Current count: 52000, row: 56798                                                                                                                                            
Current count: 53000, row: 57698                                                                                                                                            
Current count: 54000, row: 58598                                                                                                                                            
Current count: 55000, row: 59498                                                                                                                                            
Current count: 56000, row: 60397                                                                                                                                            
Current count: 57000, row: 61297                                                                                                                                            
Current count: 58000, row: 62197                                                                                                                                            
Current count: 59000, row: 63097                                                                                                                                            
Current count: 60000, row: 63998                                                                                                                                            
Current count: 61000, row: 64898                                                                                                                                            
Current count: 62000, row: 65798                                                                                                                                            
Current count: 63000, row: 66698                                                                                                                                            
Current count: 64000, row: 67598                                                                                                                                            
Current count: 65000, row: 68498                                                                                                                                            
Current count: 66000, row: 69398                                                                                                                                            
Current count: 67000, row: 70297                                                                                                                                            
Current count: 68000, row: 71197                                                                                                                                            
Current count: 69000, row: 72097                                                                                                                                            
Current count: 70000, row: 72998                                                                                                                                            
Current count: 71000, row: 73898                                                                                                                                            
Current count: 72000, row: 74798                                                                                                                                            
Current count: 73000, row: 75698                                                                                                                                            
Current count: 74000, row: 76598                                                                                                                                            
Current count: 75000, row: 77498                                                                                                                                            
Current count: 76000, row: 78398                                                                                                                                            
Current count: 77000, row: 79298                                                                                                                                            
Current count: 78000, row: 80197                                                                                                                                            
Current count: 79000, row: 81097                                                                                                                                            
Current count: 80000, row: 81998                                                                                                                                            
Current count: 81000, row: 82898                                                                                                                                            
Current count: 82000, row: 83798                                                                                                                                            
Current count: 83000, row: 84698                                                                                                                                            
Current count: 84000, row: 85598                                                                                                                                            
Current count: 85000, row: 86498                                                                                                                                            
Current count: 86000, row: 87398                                                                                                                                            
Current count: 87000, row: 88298                                                                                                                                            
Current count: 88000, row: 89198                                                                                                                                            
Current count: 89000, row: 90097                                                                                                                                            
Current count: 90000, row: 90998                                                                                                                                            
Current count: 91000, row: 91898                                                                                                                                            
Current count: 92000, row: 92798                                                                                                                                            
Current count: 93000, row: 93698                                                                                                                                            
Current count: 94000, row: 94598                                                                                                                                            
Current count: 95000, row: 95498                                                                                                                                            
Current count: 96000, row: 96398                                                                                                                                            
Current count: 97000, row: 97298                                                                                                                                            
Current count: 98000, row: 98198                                                                                                                                            
Current count: 99000, row: 99098                                                                                                                                            
Current count: 100000, row: 99999                                                                                                                                           
100000 row(s) in 7.0460 seconds

=> 100000
hbase(main):022:0>

0 人点赞