web_worker.rb 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. class WebWorker
  2. @@validator = ValidationWorker.new
  3. def self.validate_email_list(filepath)
  4. domain="hushfling.com"
  5. website = Website.find_or_initialize_by(:domain => domain, :user_id => 1)
  6. website.save
  7. counter = 0
  8. file = File.open("/tmp/target_email_list.txt", "rb")
  9. file.each do |line|
  10. line.delete!("\n")
  11. email = EmailLead.find_or_create_by(:address => line, :domain => "hushfling.com", :user_id => 1)
  12. if email
  13. puts email.id
  14. puts email.address
  15. end
  16. end
  17. puts counter
  18. file.close
  19. end
  20. def self.post(url, path, body={})
  21. uri = URI.parse(url)
  22. http = Net::HTTP.new(uri.host, uri.port)
  23. request = Net::HTTP::Post.new(path)
  24. request.add_field('Content-Type', 'application/json')
  25. request.body = body.to_json
  26. response = http.request(request)
  27. response.body
  28. end
  29. def self.new_image_service
  30. system("curl -X PUT 'http://#{ENV['API_HOST']}:8080/services/imageserv' -d '{\"mllib\":\"caffe\",\"description\":\"image classification service\",\"type\":\"supervised\",\"parameters\":{\"input\":{\"connector\":\"image\"},\"mllib\":{\"nclasses\":1000}},\"model\":{\"repository\":\"/opt/models/ggnet/\"}}'")
  31. end
  32. def self.predict_image(url)
  33. body = {"service"=>"imageserv", "parameters"=>{"input"=>{"width"=>224, "height"=>224}, "output"=>{"best"=>3}}, "data"=>["#{url}"]}
  34. uri = URI.parse("http://#{ENV['API_HOST']}:8080")
  35. http = Net::HTTP.new(uri.host, uri.port)
  36. request = Net::HTTP::Post.new("/predict")
  37. request.add_field('Content-Type', 'application/json')
  38. request.body = body.to_json
  39. response = http.request(request)
  40. response.body
  41. end
  42. def self.predict_location(ip)
  43. body = {"service"=>"imageserv", "parameters"=>{"input"=>{"width"=>224, "height"=>224}, "output"=>{"best"=>3}}, "data"=>["#{url}"]}
  44. uri = URI.parse("http://#{ENV['API_HOST']}:8080")
  45. http = Net::HTTP.new(uri.host, uri.port)
  46. request = Net::HTTP::Post.new("/predict")
  47. request.add_field('Content-Type', 'application/json')
  48. request.body = body.to_json
  49. response = http.request(request)
  50. response.body
  51. result = Curl.get("http://#{ENV['API_HOST']}/website/locate", {:url => "http://google.com"})
  52. if result
  53. response = result.body_str
  54. end
  55. expect(response).to be_truthy
  56. expect(response).to have_content("CA")
  57. puts response
  58. end
  59. def self.train_image(url, tags=[])
  60. body = {"service"=>"imageserv", "async"=>true, "parameters"=>{"mllib"=>{"gpu"=>false, "net"=>{"batch_size"=>32}, "solver"=>{"test_interval"=>500, "iterations"=>30000, "base_lr"=>0.001, "stepsize"=>1000, "gamma"=>0.9}}, "input"=>{"connector"=>"image", "test_split"=>0.1, "shuffle"=>true, "width"=>224, "height"=>224}, "output"=>{"measure"=>["acc", "mcll", "f1"]}}, "data"=>tags}
  61. uri = URI.parse("http://#{ENV['API_HOST']}:8080")
  62. http = Net::HTTP.new(uri.host, uri.port)
  63. request = Net::HTTP::Post.new("/train")
  64. request.add_field('Content-Type', 'application/json')
  65. request.body = body.to_json
  66. response = http.request(request)
  67. response.body
  68. end
  69. def self.ocr_image(url)
  70. uri = URI.parse("http://#{ENV['API_HOST']}:9292")
  71. http = Net::HTTP.new(uri.host, uri.port)
  72. request = Net::HTTP::Post.new("/ocr")
  73. request.add_field('Content-Type', 'application/json')
  74. request.body = {:img_url => url, :worker => "tesseract"}.to_json
  75. response = http.request(request)
  76. response.body
  77. end
  78. def self.analyze_text(text)
  79. if text
  80. hash = {}
  81. tagged = @@tgr.add_tags(text)
  82. hash[:word_list] = @@tgr.get_words(text)
  83. hash[:nouns] = @@tgr.get_nouns(tagged)
  84. hash[:proper_nouns] = @@tgr.get_proper_nouns(tagged)
  85. hash[:past_tense_verbs] = @@tgr.get_past_tense_verbs(tagged)
  86. hash[:adjectives] = @@tgr.get_adjectives(tagged)
  87. hash[:noun_phrases] = @@tgr.get_noun_phrases(tagged)
  88. hash[:language] = @@wl.language(text)
  89. hash[:languages_ranked] = @@wl.process_text(text)
  90. hash[:profanity] = SadPanda.polarity (text)
  91. hash[:emotion] = SadPanda.emotion (text)
  92. hash[:reading_level] = Odyssey.coleman_liau (text)
  93. return hash
  94. else
  95. return false
  96. end
  97. end
  98. def self.crawl(url, user_id)
  99. job_id = SecureRandom.hex(8)
  100. qid = SpiderWorker.perform_async(url, user_id, job_id)
  101. if qid
  102. return qid
  103. else
  104. return false
  105. end
  106. end
  107. def self.get_page_rank(url)
  108. googlerank = GooglePageRank.get(url)
  109. if googlerank
  110. return googlerank
  111. else
  112. return false
  113. end
  114. end
  115. def self.extract_product(url)
  116. if url
  117. hash = {}
  118. product = Fletcher.fetch url
  119. hash[:product_name] = product.name # => "Avenir Deluxe Unicycle (20-Inch Wheel)"
  120. hash[:description] = product.description
  121. # hash[:image] = product.image.src || nil
  122. hash[:price] = product.price
  123. hash
  124. else
  125. return false
  126. end
  127. end
  128. def self.extract_entities(text)
  129. if text
  130. entities = @@ner.perform(text)
  131. if entities
  132. return entities
  133. else
  134. return false
  135. end
  136. end
  137. end
  138. def self.check_email(email_address)
  139. if email_address
  140. resp = EmailVerifier.check(email_address)
  141. if resp
  142. return resp
  143. else
  144. return false
  145. end
  146. end
  147. end
  148. def self.create_services
  149. json = '{
  150. "service":"imageserv",
  151. "parameters":{
  152. "mllib":{
  153. "gpu":true
  154. },
  155. "input":{
  156. "width":224,
  157. "height":224
  158. },
  159. "output":{
  160. "best":3,
  161. "template":"{ {{#body}}{{#predictions}} \"uri\":\"{{uri}}\",\"categories\": [ {{#classes}} { \"category\":\"{{cat}}\",\"score\":{{prob}} } {{^last}},{{/last}}{{/classes}} ] {{/predictions}}{{/body}} }",
  162. "network":{
  163. "url":"your-elasticsearch-server.com/images/img",
  164. "http_method":"POST"
  165. }
  166. }
  167. },
  168. "data":["http://i.ytimg.com/vi/0vxOhd4qlnA/maxresdefault.jpg"]
  169. }'
  170. result = system("curl -XPOST 'http://localhost:8080/predict' -d #{json}")
  171. end
  172. def self.validate_email(email_address, user_id)
  173. resp = @@validator.perform(email_address, user_id)
  174. if resp
  175. return resp
  176. else
  177. return false
  178. end
  179. end
  180. def self.analyze_email(email_address)
  181. if email_address
  182. hash = {}
  183. email_domain = email_address.to_s.split("@").last
  184. school = Swot::school_name email_address
  185. govt_domain = Gman.new email_address
  186. hash[:domain] = email_domain
  187. if school
  188. hash[:academia] ||= school
  189. end
  190. if govt_domain
  191. hash[:govt_agency] = govt_domain.agency
  192. # hash[:domain] ||= govt_domain.domain
  193. hash[:is_govt] = govt_domain.federal?
  194. hash[:academia] ||= false
  195. end
  196. return hash
  197. else
  198. return false
  199. end
  200. end
  201. def self.analyze_phone(phone_number)
  202. if phone_number
  203. hash = {}
  204. identifier = Phonelib.parse(phone_number)
  205. hash[:number] = phone_number.phony_formatted(:normalize => :US, :spaces => '-')
  206. if phone_number[0].to_s == "1"
  207. area = phone_number.to_s[1..3]
  208. else
  209. area = phone_number.to_s[0..2]
  210. end
  211. hash[:region] = Integer(area).to_region(:city => true)
  212. hash[:type] = identifier.human_type
  213. hash[:country] = identifier.country
  214. hash[:location] = identifier.geo_name
  215. return hash
  216. else
  217. return false
  218. end
  219. end
  220. def self.analyze_name(first_name, last_name)
  221. if first_name and last_name
  222. hash = {}
  223. hash[:gender] = Guess.gender(first_name.to_s.humanize)
  224. hash[:ethnicity] = $races[last_name.to_s.upcase]
  225. hash[:name] = [first_name, last_name].join(" ")
  226. return hash
  227. else
  228. return false
  229. end
  230. end
  231. def self.analyze_domain(domain_name)
  232. if domain_name
  233. url = "http://#{domain_name}"
  234. hash = {}
  235. doc = Pismo::Document.new url
  236. whois_data = Whois.whois(domain_name)
  237. googlerank = GooglePageRank.get(url)
  238. meta = MetaInspector.new(url)
  239. if doc and doc.title
  240. hash[:title] = doc.title
  241. hash[:author] = doc.author
  242. hash[:meta_keywords] = doc.keywords
  243. hash[:meta_description] = doc.description
  244. end
  245. if whois_data
  246. hash[:whois] = whois_data
  247. end
  248. if googlerank
  249. hash[:google_links] = googlerank.to_s
  250. end
  251. if meta
  252. hash[:meta] = meta.to_hash.to_s
  253. end
  254. return hash
  255. else
  256. return false
  257. end
  258. end
  259. def self.social_shares(social_media_url)
  260. if social_media_url
  261. result = SocialShares.all social_media_url
  262. return result
  263. else
  264. return false
  265. end
  266. end
  267. end