text_analysis_worker.rb 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. require 'sad_panda'
  2. require 'odyssey'
  3. require 'engtagger'
  4. require 'whatlanguage'
  5. require 'json'
  6. class TextAnalysisWorker
  7. include Sidekiq::Worker
  8. include Sidekiq::Status::Worker
  9. include Sidekiq::Benchmark::Worker
  10. sidekiq_options :queue => 'default', :retry => false, :backtrace => true, expires_in: 1.hour
  11. @@wl = WhatLanguage.new(:all)
  12. @@tgr = EngTagger.new
  13. def perform(record_id, klass)
  14. benchmark.text_analysis_metric do
  15. record = nil
  16. if klass == "EmailLead"
  17. record = EmailLead.find(record_id)
  18. elsif klass == "PhoneLead"
  19. record = PhoneLead.find(record_id)
  20. elsif klass == "SocialLead"
  21. record = SocialLead.find(record_id)
  22. end
  23. if record and record.page_text
  24. hash = {}
  25. tagged = @@tgr.add_tags record.page_text
  26. hash[:word_list] = @@tgr.get_words record.page_text
  27. hash[:nouns] = @@tgr.get_nouns(tagged)
  28. hash[:proper_nouns] = @@tgr.get_proper_nouns(tagged)
  29. hash[:past_tense_verbs] = @@tgr.get_past_tense_verbs(tagged)
  30. hash[:adjectives] = @@tgr.get_adjectives(tagged)
  31. hash[:noun_phrases] = @@tgr.get_noun_phrases(tagged)
  32. hash[:language] = @@wl.language record.page_text
  33. hash[:languages_ranked] = @@wl.process_text record.page_text
  34. hash[:profanity] = SadPanda.polarity record.page_text
  35. hash[:emotion] = SadPanda.emotion record.page_text
  36. hash[:reading_level] = Odyssey.coleman_liau record.page_text
  37. names = text.scan(/([A-Z][a-z]+(?=\s[A-Z])(?:\s[A-Z][a-z]+)+)/)
  38. hash[:names] = names.to_s
  39. if names
  40. begin
  41. names.flatten!
  42. names.uniq!
  43. names.each do |name|
  44. first_name = name.split(" ").first
  45. last_name = name.split(" ").last
  46. gender = Guess.gender(first_name.to_s.humanize)
  47. ethnicity = $races[last_name.to_s.upcase]
  48. if gender or ethnicity
  49. person = Person.find_or_initialize_by(:first_name => first_name.humanize, :last_name => last_name.humanize)
  50. unless person.gender or person.ethnicity
  51. person.gender = gender.to_s
  52. person.ethnicity = ethnicity.to_s
  53. person.save
  54. end
  55. end
  56. end
  57. rescue
  58. #ignore
  59. end
  60. end
  61. record.update(:page_json => hash.to_json)
  62. end
  63. end
  64. benchmark.finish
  65. end
  66. end