前言
Logstash 是一个开源的数据收集加工和传输软件
常与 Elasticsearch 和 Kibana 一起组成 ELK 技术栈,给日志分析带来极大的便利
这里分享一下使用 Logstash 将 CSV 导入 Elasticsearch 的方法
参考 CSV 和 date 插件
Tip: 当前版本 Version:6.1.3
操作
系统环境
代码语言:javascript复制[root@much ~]# hostnamectl
Static hostname: much
Icon name: computer-vm
Chassis: vm
Machine ID: 33dc28f7e76c4903ad9b603b77e29a7c
Boot ID: 71a5a14bde634bfc8c5bafb7d9442f9e
Virtualization: kvm
Operating System: CentOS Linux 7 (Core)
CPE OS Name: cpe:/o:centos:centos:7
Kernel: Linux 3.10.0-514.21.1.el7.x86_64
Architecture: x86-64
[root@much ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN qlen 1
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 08:00:27:d1:5d:f7 brd ff:ff:ff:ff:ff:ff
inet 10.0.2.15/24 brd 10.0.2.255 scope global dynamic enp0s3
valid_lft 85051sec preferred_lft 85051sec
3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen 1000
link/ether 08:00:27:47:20:56 brd ff:ff:ff:ff:ff:ff
inet 192.168.56.208/24 brd 192.168.56.255 scope global enp0s8
valid_lft forever preferred_lft forever
4: virbr0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc noqueue state DOWN qlen 1000
link/ether 52:54:00:16:5e:11 brd ff:ff:ff:ff:ff:ff
inet 192.168.122.1/24 brd 192.168.122.255 scope global virbr0
valid_lft forever preferred_lft forever
5: virbr0-nic: <BROADCAST,MULTICAST> mtu 1500 qdisc pfifo_fast master virbr0 state DOWN qlen 1000
link/ether 52:54:00:16:5e:11 brd ff:ff:ff:ff:ff:ff
[root@much ~]#
准备 CSV 文件
代码语言:javascript复制[root@much es]# for i in {10..30} ; do echo `date "%Y/01/$i %H:%M:%S"`",$i-1,$i 1,$i 5"; done > test.csv
[root@much es]# vim test.csv
[root@much es]# cat test.csv
a,b,c,d
2018/01/10 23:54:53,10-1,10 1,10 5
2018/01/11 23:54:53,11-1,11 1,11 5
2018/01/12 23:54:53,12-1,12 1,12 5
2018/01/13 23:54:53,13-1,13 1,13 5
2018/01/14 23:54:53,14-1,14 1,14 5
2018/01/15 23:54:53,15-1,15 1,15 5
2018/01/16 23:54:53,16-1,16 1,16 5
2018/01/17 23:54:53,17-1,17 1,17 5
2018/01/18 23:54:53,18-1,18 1,18 5
2018/01/19 23:54:53,19-1,19 1,19 5
2018/01/20 23:54:53,20-1,20 1,20 5
2018/01/21 23:54:53,21-1,21 1,21 5
2018/01/22 23:54:53,22-1,22 1,22 5
2018/01/23 23:54:53,23-1,23 1,23 5
2018/01/24 23:54:53,24-1,24 1,24 5
2018/01/25 23:54:53,25-1,25 1,25 5
2018/01/26 23:54:53,26-1,26 1,26 5
2018/01/27 23:54:53,27-1,27 1,27 5
2018/01/28 23:54:53,28-1,28 1,28 5
2018/01/29 23:54:53,29-1,29 1,29 5
2018/01/30 23:54:53,30-1,30 1,30 5
[root@much es]#
这个文件只有四列 a,b,c,d
其中 a 列包含了时间戳
准备配置文件
代码语言:javascript复制[root@much es]# vim test.conf
[root@much es]# cat test.conf
input {
file {
path => "/root/es/test.csv"
start_position => "beginning"
}
}
filter {
csv {
separator => ","
columns => ["a","b","c","d"]
}
date{
match => [ "a", "yyyy/MM/dd HH:mm:ss" ]
}
}
output {
elasticsearch {
hosts => "http://localhost:9200"
index => "abcdjustfortest"
document_type => "csv"
}
stdout {codec => rubydebug}
}
[root@much es]#
这里有几个处理点
- 使用 file 的 input 插件指定文件位置和开始位置
- 使用 csv 的 filter 插件指明分隔符和列名
- 使用 date 的 filter 插件指明时间戳记的格式,将 a 列中的数据取出匹配为此条信息的时间戳记
- 使用 elasticsearch 的 output 插件指明 es 的位置和索引位置
- 同时以 rubydebug 的方式在 console 终端中打印出解析过后的数据
指定配置运行
代码语言:javascript复制[root@much es]# /usr/share/logstash/bin/logstash -f test.conf
WARNING: Could not find logstash.yml which is typically located in $LS_HOME/config or /etc/logstash. You can specify the path using --path.settings. Continuing using the defaults
Could not find log4j2 configuration at path /usr/share/logstash/config/log4j2.properties. Using default config which logs errors to the console
{
"@version" => "1",
"c" => "c",
"a" => "a",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "a,b,c,d",
"d" => "d",
"b" => "b",
"tags" => [
[0] "_dateparsefailure"
],
"@timestamp" => 2018-02-02T15:58:05.969Z
}
{
"@version" => "1",
"c" => "10 1",
"a" => "2018/01/10 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/10 23:54:53,10-1,10 1,10 5",
"d" => "10 5",
"b" => "10-1",
"@timestamp" => 2018-01-10T15:54:53.000Z
}
{
"@version" => "1",
"c" => "11 1",
"a" => "2018/01/11 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/11 23:54:53,11-1,11 1,11 5",
"d" => "11 5",
"b" => "11-1",
"@timestamp" => 2018-01-11T15:54:53.000Z
}
{
"@version" => "1",
"c" => "12 1",
"a" => "2018/01/12 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/12 23:54:53,12-1,12 1,12 5",
"d" => "12 5",
"b" => "12-1",
"@timestamp" => 2018-01-12T15:54:53.000Z
}
{
"@version" => "1",
"c" => "13 1",
"a" => "2018/01/13 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/13 23:54:53,13-1,13 1,13 5",
"d" => "13 5",
"b" => "13-1",
"@timestamp" => 2018-01-13T15:54:53.000Z
}
{
"@version" => "1",
"c" => "14 1",
"a" => "2018/01/14 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/14 23:54:53,14-1,14 1,14 5",
"d" => "14 5",
"b" => "14-1",
"@timestamp" => 2018-01-14T15:54:53.000Z
}
{
"@version" => "1",
"c" => "15 1",
"a" => "2018/01/15 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/15 23:54:53,15-1,15 1,15 5",
"d" => "15 5",
"b" => "15-1",
"@timestamp" => 2018-01-15T15:54:53.000Z
}
{
"@version" => "1",
"c" => "16 1",
"a" => "2018/01/16 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/16 23:54:53,16-1,16 1,16 5",
"d" => "16 5",
"b" => "16-1",
"@timestamp" => 2018-01-16T15:54:53.000Z
}
{
"@version" => "1",
"c" => "17 1",
"a" => "2018/01/17 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/17 23:54:53,17-1,17 1,17 5",
"d" => "17 5",
"b" => "17-1",
"@timestamp" => 2018-01-17T15:54:53.000Z
}
{
"@version" => "1",
"c" => "18 1",
"a" => "2018/01/18 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/18 23:54:53,18-1,18 1,18 5",
"d" => "18 5",
"b" => "18-1",
"@timestamp" => 2018-01-18T15:54:53.000Z
}
{
"@version" => "1",
"c" => "19 1",
"a" => "2018/01/19 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/19 23:54:53,19-1,19 1,19 5",
"d" => "19 5",
"b" => "19-1",
"@timestamp" => 2018-01-19T15:54:53.000Z
}
{
"@version" => "1",
"c" => "20 1",
"a" => "2018/01/20 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/20 23:54:53,20-1,20 1,20 5",
"d" => "20 5",
"b" => "20-1",
"@timestamp" => 2018-01-20T15:54:53.000Z
}
{
"@version" => "1",
"c" => "21 1",
"a" => "2018/01/21 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/21 23:54:53,21-1,21 1,21 5",
"d" => "21 5",
"b" => "21-1",
"@timestamp" => 2018-01-21T15:54:53.000Z
}
{
"@version" => "1",
"c" => "22 1",
"a" => "2018/01/22 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/22 23:54:53,22-1,22 1,22 5",
"d" => "22 5",
"b" => "22-1",
"@timestamp" => 2018-01-22T15:54:53.000Z
}
{
"@version" => "1",
"c" => "23 1",
"a" => "2018/01/23 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/23 23:54:53,23-1,23 1,23 5",
"d" => "23 5",
"b" => "23-1",
"@timestamp" => 2018-01-23T15:54:53.000Z
}
{
"@version" => "1",
"c" => "24 1",
"a" => "2018/01/24 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/24 23:54:53,24-1,24 1,24 5",
"d" => "24 5",
"b" => "24-1",
"@timestamp" => 2018-01-24T15:54:53.000Z
}
{
"@version" => "1",
"c" => "25 1",
"a" => "2018/01/25 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/25 23:54:53,25-1,25 1,25 5",
"d" => "25 5",
"b" => "25-1",
"@timestamp" => 2018-01-25T15:54:53.000Z
}
{
"@version" => "1",
"c" => "26 1",
"a" => "2018/01/26 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/26 23:54:53,26-1,26 1,26 5",
"d" => "26 5",
"b" => "26-1",
"@timestamp" => 2018-01-26T15:54:53.000Z
}
{
"@version" => "1",
"c" => "27 1",
"a" => "2018/01/27 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/27 23:54:53,27-1,27 1,27 5",
"d" => "27 5",
"b" => "27-1",
"@timestamp" => 2018-01-27T15:54:53.000Z
}
{
"@version" => "1",
"c" => "28 1",
"a" => "2018/01/28 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/28 23:54:53,28-1,28 1,28 5",
"d" => "28 5",
"b" => "28-1",
"@timestamp" => 2018-01-28T15:54:53.000Z
}
{
"@version" => "1",
"c" => "29 1",
"a" => "2018/01/29 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/29 23:54:53,29-1,29 1,29 5",
"d" => "29 5",
"b" => "29-1",
"@timestamp" => 2018-01-29T15:54:53.000Z
}
{
"@version" => "1",
"c" => "30 1",
"a" => "2018/01/30 23:54:53",
"host" => "much",
"path" => "/root/es/test.csv",
"message" => "2018/01/30 23:54:53,30-1,30 1,30 5",
"d" => "30 5",
"b" => "30-1",
"@timestamp" => 2018-01-30T15:54:53.000Z
}
...
...
...
符合预期
可以再去 kibana 里看看数据内容
选择索引
选择时间字段