从2GB的excel数据中抽取用户的email

2021-09-24 14:29:17 浏览数 (1)

要从2GB的excel数据中抽取用户的email。不会C,java写起来太麻烦,ruby一下吧,分享一下成果,如果谁也遇到这个就别撞墙啦~~~~~

代码语言:javascript复制
$KCODE = 'u'
require 'find'
class Auto_fix_mail_pros
	def runexcel
	  #把win32ole包含进来后,就可以通过和windows下的excel api进行交互来对excel文件进行读写了.
	  filepath=File.dirname('D:/excelrun/temp')
	  filename=[]
	  filenameid=0
	  require 'win32ole'
	  require 'find' 
	  Find.find(filepath) do |f| 
	    if File.basename(f).index('@$$.') == nil
	      if (File.basename(f) =~ /.*.xls$/)  || (File.basename(f) =~ /.*.XLS$/)
	        filename << f
	        filenameid=filenameid 1
	      end  
	    end
	  end 
	  if filenameid>0 
	    for id in 0..filenameid-1
	      excel = WIN32OLE::new('excel.Application')
	      excel.visible = false     # in case you want to see what happens 
	      excel.Application.DisplayAlerts   =   false
	      #打开excel文件,对其中的sheet进行访问:
	      workbook = excel.Workbooks.Open(filename[id])
	      sheetcount=workbook.Sheets.Count
	      loadImage(filename[id])
	      require 'pathname'
	      filepathname=Pathname.new(File.dirname(filename[id])).realpath 
	      #开始时间
	      begtime=Time.now
	      for sheetnum in 1..sheetcount
	        worksheet = workbook.Worksheets(sheetnum) #定位到第一个sheet
	        worksheet.Select
	        
	        rows=worksheet.UsedRange.Rows.count
	        cols=worksheet.UsedRange.columns.count
	        #puts rows 
	        #puts cols 
	        #符合条件存入数组
	        data = []
	        i=0
	        for row in 1..rows
	          for col in 1..cols 
	             if  worksheet.Cells(row, col).value.to_s =~/w ([- .]w )*@w ([-.]w )*.w ([-.]w )*/
	                data << worksheet.Cells(row, col).value.to_s
	                i=i 1
	             end
	          end 
	        end
	        worksheet=nil
	        #结果写入文件
	        if i>0 
	          #require 'stringio'
	          basename=File.basename(filename[id], ".*") "_#{sheetnum}.txt"
	          $Sfilename=filepathname basename
	          loadImage ($Sfilename)
	          
	          file =File.new($Sfilename,"w")
	            for j in 0..i-1
	              file.puts data[j]
	            end
	          #内存回收
	          data.clear  
	          file.close
	        end
	      end
	      workbook.saved=true
	      excel.Workbooks.Close
	      workbook=nil
	      #结束会话
	      excel.Quit
	      #excel=nil
	      #重命名处理过的文件
	      newname=filepathname (File.basename(filename[id], ".*") "@$$.xls")
	      #File.rename(filename[id],newname)
	      #结束时间
	      endtime=Time.now
	      #执行时间
	      loadImage((endtime-begtime).to_s)
	    end
	      #结束会话
	      excel.Quit
	      excel=nil
	    #内存回收
	    filename.clear
	    data.clear
	  end
	  GC.start 
	end
	
	def loadImage(file)
  end
end

mail_pros_tools = Auto_fix_mail_pros.new
mail_pros_tools.runexcel

0 人点赞