1. 在Linux集群下已经搭建了Zookeeper Hadoop HBase
hostname | ip | 组件 |
node0 | | zookeeper,namenode,NodeManager,HMaster,HRegionServer |
node1 | | zookeeper,datanode,NodeManager,HRegionServer |
node2 | | zookeeper,datanode,ResourceManager,HMaster,HRegionServer |
2. 在Windows下搭建HBase应用程序开发环境
2.1 安装配置JDK
2.2 安装配置Maven
打开CMD,输入mvn -v
2.3 配置Eclipse
目前的eclipse-javee版本已经自带maven插件了 winows-preferences-左边maven可以看到安装好的maven
2.4 创建Maven项目
http://mvnrepository.com/ 比如我们需要引入spring核心jar包spring-core,打开Maven Repository,搜索spring-core 选择最新版本3.2.0.RELEASE,可以看到其dependency写法如下红框所示: 将其复制到pom.xml中的中 。这样,Maven就会开始自动下载jar包到本地仓库,然后关联到你的项目中,下载完成后,我们展开工程目录中External Libraries。
代码语言:javascript复制<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
题外话,如果是WEB项目,还需要在pom.xml中导入 javaee-api.jar
2.5 编写应用程序
代码语言:javascript复制package hbaseDemo.dao;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
public class HbaseDao {
private static Configuration conf = HBaseConfiguration.create();
static {
conf.set("hbase.rootdir", "hdfs://cc/hbase");
// 设置Zookeeper,直接设置IP地址
conf.set("hbase.zookeeper.quorum", ",,");
// 创建表
public static void createTable(String tablename, String columnFamily) throws Exception {
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
TableName tableNameObj = TableName.valueOf(tablename);
if (admin.tableExists(tableNameObj)) {
System.out.println("Table exists!");
} else {
HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tablename));
tableDesc.addFamily(new HColumnDescriptor(columnFamily));
System.out.println("create table success!");
// 删除表
public static void deleteTable(String tableName) {
try {
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
TableName table = TableName.valueOf(tableName);
System.out.println("delete table " tableName " ok.");
} catch (IOException e) {
// 插入一行记录
public static void addRecord(String tableName, String rowKey, String family, String qualifier, String value){
try {
Connection connection = ConnectionFactory.createConnection(conf);
Table table = connection.getTable(TableName.valueOf(tableName));
Put put = new Put(Bytes.toBytes(rowKey));
put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes.toBytes(value));
put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes.toBytes(value));
System.out.println("insert recored " rowKey " to table " tableName " ok.");
} catch (IOException e) {
public static void main(String[] args) throws Exception {
HbaseDao.createTable("testTb", "info");
HbaseDao.addRecord("testTb", "001", "info", "name", "zhangsan");
HbaseDao.addRecord("testTb", "001", "info", "age", "20");
代码语言:javascript复制[root@node0 ~]# hbase shell
2017-04-07 01:51:31,268 WARN [main] util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/lib/hbase/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/lib/hadoop/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
HBase Shell; enter 'help<RETURN>' for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version 1.2.4, r67592f3d062743907f8c5ae00dbbe1ae4f69e5af, Tue Oct 25 18:10:20 CDT 2016
hbase(main):001:0> list
2 row(s) in 0.2760 seconds
=> ["googlebook", "testTb"]
hbase(main):002:0> scan 'testTb'
001 column=info:age, timestamp=1491544218337, value=20
001 column=info:name, timestamp=1491544218154, value=zhangsan
1 row(s) in 0.1980 seconds
2.6 批量导入数据
代码语言:javascript复制[root@node0 data]# vi gen.sh
[root@node data]# cat gen.sh
for i in {1..100000};do
echo -e $i't'$RANDOM't'$RANDOM't'$RANDOM
[root@node0 data]# sh gen.sh > mydata.txt
[root@node0 data]# tail -10 mydata.txt
99991 5421 23010 14796
99992 8131 27221 11846
99993 20723 8007 14215
99994 20876 29543 5465
99995 14753 19926 20000
99996 26226 7228 25424
99997 18393 15515 13721
99998 1855 23042 27666
99999 16761 16120 24486
100000 14619 17100 556
代码语言:javascript复制[root@node0 data]# hdfs dfs -put mydata.txt input
[root@node0 data]# hdfs dfs -ls input
Found 1 items
-rw-r--r-- 3 root hbase 1698432 2017-07-19 20:38 input/mydata.txt
You have mail in /var/spool/mail/root
[root@node0 data]#
代码语言:javascript复制hbase(main):021:0> create 'mydata','info'
代码语言:javascript复制<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-protocol -->
代码语言:javascript复制package hbaseDemo.dao;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
public class BatchImport {
public static class BatchImportMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
protected void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
// super.setup( context );
//System.out.println(key ":" value);
context.write(key, value);
static class BatchImportReducer extends TableReducer<LongWritable, Text, NullWritable> {
protected void reduce(LongWritable key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
for (Text text : values) {
final String[] splited = text.toString().split("t");
final Put put = new Put(Bytes.toBytes(splited[0]));// 第一列行键
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("data1"), Bytes.toBytes(splited[1]));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("data2"), Bytes.toBytes(splited[2]));
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("data3"), Bytes.toBytes(splited[3]));
context.write(NullWritable.get(), put);
* 之前一直报错,failed on connection exception 拒绝连接:nb0:8020
* 因为namenode节点不在192.168.1.160上,而在192.168.1.161和192.168.1.162
* @param args
* @throws Exception
public static void main(String[] args) throws Exception {
final Configuration conf = new Configuration();
conf.set("hbase.rootdir", "hdfs://cetc32/hbase");
// 设置Zookeeper,直接设置IP地址
conf.set("hbase.zookeeper.quorum", ",,");
// 设置hbase表名称(先在shell下创建一个表:create 'mydata','info')
conf.set(TableOutputFormat.OUTPUT_TABLE, "mydata");
// 将该值改大,防止hbase超时退出
conf.set("dfs.socket.timeout", "180000");
//System.setProperty("HADOOP_USER_NAME", "root");
// 设置fs.defaultFS
conf.set("fs.defaultFS", "hdfs://");
// 设置yarn.resourcemanager节点
conf.set("yarn.resourcemanager.hostname", "nb1");
Job job = Job.getInstance(conf);
// 设置map的输出,不设置reduce的输出类型
// 不再设置输出路径,而是设置输出格式类型
FileInputFormat.setInputPaths(job, "hdfs://");
boolean flag=job.waitForCompletion(true);
代码语言:javascript复制log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
代码语言:javascript复制hbase(main):021:0> count 'mydata'
