bulk_extract_worker.rb 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. require "tika/app"
  2. class BulkExtractWorker
  3. include Sidekiq::Worker
  4. include Sidekiq::Status::Worker
  5. include Sidekiq::Benchmark::Worker
  6. sidekiq_options :queue => 'default', :retry => false, :backtrace => true, expires_in: 1.hour
  7. def perform(source_urls=[], user_id)
  8. benchmark.bulk_extraction_metric do
  9. total source_urls.count
  10. source_urls.each do |src_url|
  11. social_records = SocialLead.where(:user_id => user_id, :source_url => src_url)
  12. phone_records = PhoneLead.where(:user_id => user_id, :source_url => src_url)
  13. email_records = EmailLead.where(:user_id => user_id, :source_url => src_url)
  14. records = [social_records,phone_records,email_records].flatten
  15. if records
  16. resource = Tika::Resource.new(src_url)
  17. if resource and resource.text
  18. counter = 0
  19. records.each do |record|
  20. counter += 1
  21. at counter, ">> #{record.source_url}"
  22. record.update(:page_text => resource.text)
  23. end
  24. end
  25. end
  26. end
  27. end
  28. benchmark.finish
  29. end
  30. end