ETL (Extract-Transform-Load) with Kiba(3)

2021-10-18 11:57:41 浏览数 (1)

转化日期

加入解析数值的类 ParseFrenchDate ,并定义处理逻辑

代码语言:javascript复制
[root@h102 kiba]# vim common.rb 
[root@h102 kiba]# cat common.rb 
require 'csv'

class CsvSource
  def initialize(file, options)
    @file = file
    @options = options
  end
  
  def each
    CSV.foreach(@file, @options) do |row|
      yield row.to_hash
    end
  end
end


require 'awesome_print'

def show_me
  transform do |row|
    ap row
    row # always return the row to keep it in the pipeline
  end
end


class ParseFrenchFloat
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = Float(row[@from].gsub(',', '.'))
    row
  end
end


class ParseFrenchDate
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = Date.strptime(row[@from], '%d/%m/%Y').to_s
    row
  end
end
[root@h102 kiba]# vim convert-csv.etl 
[root@h102 kiba]# cat convert-csv.etl 
require_relative 'common'

# read from source CSV file
source CsvSource, 'commandes.csv', col_sep: ';', headers: true, header_converters: :symbol

# Parse the numbers
transform ParseFrenchFloat, from: :montant_eur, to: :amount_eur

#Reformat the dates
transform ParseFrenchDate, from: :date_facture, to: :invoice_date

# show details of row contents
show_me
[root@h102 kiba]# bundle exec kiba convert-csv.etl
{
       :date_facture => "7/3/2015",
        :montant_eur => "10,96",
    :numero_commande => "FA1986",
         :amount_eur => 10.96,
       :invoice_date => "2015-03-07"
}
{
       :date_facture => "7/3/2015",
        :montant_eur => "85,11",
    :numero_commande => "FA1987",
         :amount_eur => 85.11,
       :invoice_date => "2015-03-07"
}
{
       :date_facture => "8/3/2015",
        :montant_eur => "6,41",
    :numero_commande => "FA1988",
         :amount_eur => 6.41,
       :invoice_date => "2015-03-08"
}
[root@h102 kiba]# 

其中最主要的就是 row[@to] = Date.strptime(row[@from], '%d/%m/%Y').to_s

它的意思就是对 from 字段(或 Key) 指向的值进行处理,将其中的值以 '%d/%m/%Y' 模式解析成日期 ,然后转化为字符串格式,然后赋予给 to 字段,这个字段是新字段,在 row hash 中添加入新的 KV 对

运行的结果正如预期


对列进行重命名

加入对列进行重命名的类 RenameField ,并定义处理逻辑

代码语言:javascript复制
[root@h102 kiba]# vim common.rb 
[root@h102 kiba]# cat common.rb 
require 'csv'

class CsvSource
  def initialize(file, options)
    @file = file
    @options = options
  end
  
  def each
    CSV.foreach(@file, @options) do |row|
      yield row.to_hash
    end
  end
end


require 'awesome_print'

def show_me
  transform do |row|
    ap row
    row # always return the row to keep it in the pipeline
  end
end


class ParseFrenchFloat
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = Float(row[@from].gsub(',', '.'))
    row
  end
end


class ParseFrenchDate
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = Date.strptime(row[@from], '%d/%m/%Y').to_s
    row
  end
end


class RenameField
  def initialize(from:, to:)
    @from = from
    @to = to
  end
  
  def process(row)
    row[@to] = row.delete(@from)
    row
  end
end
[root@h102 kiba]# vim convert-csv.etl 
[root@h102 kiba]# cat convert-csv.etl 
require_relative 'common'

# read from source CSV file
source CsvSource, 'commandes.csv', col_sep: ';', headers: true, header_converters: :symbol

# Parse the numbers
transform ParseFrenchFloat, from: :montant_eur, to: :amount_eur

#Reformat the dates
transform ParseFrenchDate, from: :date_facture, to: :invoice_date

#Rename the remaining column
transform RenameField, from: :numero_commande, to: :invoice_number

# show details of row contents
show_me
[root@h102 kiba]# bundle exec kiba convert-csv.etl
{
      :date_facture => "7/3/2015",
       :montant_eur => "10,96",
        :amount_eur => 10.96,
      :invoice_date => "2015-03-07",
    :invoice_number => "FA1986"
}
{
      :date_facture => "7/3/2015",
       :montant_eur => "85,11",
        :amount_eur => 85.11,
      :invoice_date => "2015-03-07",
    :invoice_number => "FA1987"
}
{
      :date_facture => "8/3/2015",
       :montant_eur => "6,41",
        :amount_eur => 6.41,
      :invoice_date => "2015-03-08",
    :invoice_number => "FA1988"
}
[root@h102 kiba]# 

0 人点赞