转化日期
加入解析数值的类 ParseFrenchDate ,并定义处理逻辑
代码语言:javascript复制[root@h102 kiba]# vim common.rb
[root@h102 kiba]# cat common.rb
require 'csv'
class CsvSource
def initialize(file, options)
@file = file
@options = options
end
def each
CSV.foreach(@file, @options) do |row|
yield row.to_hash
end
end
end
require 'awesome_print'
def show_me
transform do |row|
ap row
row # always return the row to keep it in the pipeline
end
end
class ParseFrenchFloat
def initialize(from:, to:)
@from = from
@to = to
end
def process(row)
row[@to] = Float(row[@from].gsub(',', '.'))
row
end
end
class ParseFrenchDate
def initialize(from:, to:)
@from = from
@to = to
end
def process(row)
row[@to] = Date.strptime(row[@from], '%d/%m/%Y').to_s
row
end
end
[root@h102 kiba]# vim convert-csv.etl
[root@h102 kiba]# cat convert-csv.etl
require_relative 'common'
# read from source CSV file
source CsvSource, 'commandes.csv', col_sep: ';', headers: true, header_converters: :symbol
# Parse the numbers
transform ParseFrenchFloat, from: :montant_eur, to: :amount_eur
#Reformat the dates
transform ParseFrenchDate, from: :date_facture, to: :invoice_date
# show details of row contents
show_me
[root@h102 kiba]# bundle exec kiba convert-csv.etl
{
:date_facture => "7/3/2015",
:montant_eur => "10,96",
:numero_commande => "FA1986",
:amount_eur => 10.96,
:invoice_date => "2015-03-07"
}
{
:date_facture => "7/3/2015",
:montant_eur => "85,11",
:numero_commande => "FA1987",
:amount_eur => 85.11,
:invoice_date => "2015-03-07"
}
{
:date_facture => "8/3/2015",
:montant_eur => "6,41",
:numero_commande => "FA1988",
:amount_eur => 6.41,
:invoice_date => "2015-03-08"
}
[root@h102 kiba]#
其中最主要的就是 row[@to] = Date.strptime(row[@from], '%d/%m/%Y').to_s
它的意思就是对 from 字段(或 Key) 指向的值进行处理,将其中的值以 '%d/%m/%Y'
模式解析成日期 ,然后转化为字符串格式,然后赋予给 to 字段,这个字段是新字段,在 row hash 中添加入新的 KV 对
运行的结果正如预期
对列进行重命名
加入对列进行重命名的类 RenameField ,并定义处理逻辑
代码语言:javascript复制[root@h102 kiba]# vim common.rb
[root@h102 kiba]# cat common.rb
require 'csv'
class CsvSource
def initialize(file, options)
@file = file
@options = options
end
def each
CSV.foreach(@file, @options) do |row|
yield row.to_hash
end
end
end
require 'awesome_print'
def show_me
transform do |row|
ap row
row # always return the row to keep it in the pipeline
end
end
class ParseFrenchFloat
def initialize(from:, to:)
@from = from
@to = to
end
def process(row)
row[@to] = Float(row[@from].gsub(',', '.'))
row
end
end
class ParseFrenchDate
def initialize(from:, to:)
@from = from
@to = to
end
def process(row)
row[@to] = Date.strptime(row[@from], '%d/%m/%Y').to_s
row
end
end
class RenameField
def initialize(from:, to:)
@from = from
@to = to
end
def process(row)
row[@to] = row.delete(@from)
row
end
end
[root@h102 kiba]# vim convert-csv.etl
[root@h102 kiba]# cat convert-csv.etl
require_relative 'common'
# read from source CSV file
source CsvSource, 'commandes.csv', col_sep: ';', headers: true, header_converters: :symbol
# Parse the numbers
transform ParseFrenchFloat, from: :montant_eur, to: :amount_eur
#Reformat the dates
transform ParseFrenchDate, from: :date_facture, to: :invoice_date
#Rename the remaining column
transform RenameField, from: :numero_commande, to: :invoice_number
# show details of row contents
show_me
[root@h102 kiba]# bundle exec kiba convert-csv.etl
{
:date_facture => "7/3/2015",
:montant_eur => "10,96",
:amount_eur => 10.96,
:invoice_date => "2015-03-07",
:invoice_number => "FA1986"
}
{
:date_facture => "7/3/2015",
:montant_eur => "85,11",
:amount_eur => 85.11,
:invoice_date => "2015-03-07",
:invoice_number => "FA1987"
}
{
:date_facture => "8/3/2015",
:montant_eur => "6,41",
:amount_eur => 6.41,
:invoice_date => "2015-03-08",
:invoice_number => "FA1988"
}
[root@h102 kiba]#