kermit 2 years ago
parent
commit
f4cfcf893e
100 changed files with 4907 additions and 0 deletions
  1. 3 0
      engine/.dockerignore
  2. 12 0
      engine/.gitignore
  3. 1 0
      engine/.ruby-version
  4. 7 0
      engine/Capfile
  5. 15 0
      engine/Dockerfile
  6. 16 0
      engine/Dockerfile-base
  7. 8 0
      engine/Dockerfile-prod
  8. 9 0
      engine/Dockerfile-test
  9. 148 0
      engine/Gemfile
  10. 736 0
      engine/Gemfile.lock
  11. 36 0
      engine/Guardfile
  12. 1 0
      engine/Procfile
  13. 6 0
      engine/Rakefile
  14. 2 0
      engine/app/helpers/activities_helper.rb
  15. 2 0
      engine/app/helpers/application_helper.rb
  16. 2 0
      engine/app/helpers/prospects_helper.rb
  17. 8 0
      engine/app/jobs/mailing_list_signup_job.rb
  18. 9 0
      engine/app/mailers/user_mailer.rb
  19. 0 0
      engine/app/models/.keep
  20. 17 0
      engine/app/models/analysis.rb
  21. 39 0
      engine/app/models/bot.rb
  22. 80 0
      engine/app/models/company.rb
  23. 0 0
      engine/app/models/concerns/.keep
  24. 91 0
      engine/app/models/email_lead.rb
  25. 28 0
      engine/app/models/face.rb
  26. 19 0
      engine/app/models/graph.rb
  27. 87 0
      engine/app/models/image_worker.rb
  28. 6 0
      engine/app/models/person.rb
  29. 110 0
      engine/app/models/phone_lead.rb
  30. 86 0
      engine/app/models/recon.rb
  31. 37 0
      engine/app/models/resolver.rb
  32. 23 0
      engine/app/models/score.rb
  33. 143 0
      engine/app/models/social_lead.rb
  34. 68 0
      engine/app/models/text_worker.rb
  35. 242 0
      engine/app/models/user.rb
  36. 313 0
      engine/app/models/web_worker.rb
  37. 52 0
      engine/app/models/website.rb
  38. 313 0
      engine/app/models/worker.rb
  39. 39 0
      engine/app/workers/bulk_extract_worker.rb
  40. 25 0
      engine/app/workers/company_analysis_worker.rb
  41. 16 0
      engine/app/workers/company_bulk_validator_worker.rb
  42. 22 0
      engine/app/workers/dark_worker.rb
  43. 39 0
      engine/app/workers/dns_worker.rb
  44. 38 0
      engine/app/workers/extract_worker.rb
  45. 28 0
      engine/app/workers/geotag_worker.rb
  46. 36 0
      engine/app/workers/instagram_worker.rb
  47. 91 0
      engine/app/workers/lead_worker.rb
  48. 38 0
      engine/app/workers/linkedin_worker.rb
  49. 92 0
      engine/app/workers/namemail_worker.rb
  50. 16 0
      engine/app/workers/ocr_worker.rb
  51. 156 0
      engine/app/workers/okcupid_worker.rb
  52. 31 0
      engine/app/workers/post_spider_worker.rb
  53. 20 0
      engine/app/workers/score_worker.rb
  54. 283 0
      engine/app/workers/spider_worker.rb
  55. 86 0
      engine/app/workers/text_analysis_worker.rb
  56. 36 0
      engine/app/workers/twitter_worker.rb
  57. 117 0
      engine/app/workers/validation_worker.rb
  58. 49 0
      engine/app/workers/whois_worker.rb
  59. 34 0
      engine/app/workers/youtube_worker.rb
  60. 3 0
      engine/bin/bundle
  61. 9 0
      engine/bin/rails
  62. 9 0
      engine/bin/rake
  63. 8 0
      engine/bin/rspec
  64. 29 0
      engine/bin/setup
  65. 15 0
      engine/bin/spring
  66. 26 0
      engine/config.ru
  67. 7 0
      engine/config/application.rb
  68. 3 0
      engine/config/boot.rb
  69. 10 0
      engine/config/containers/Dockerfile-base
  70. 26 0
      engine/config/containers/Dockerfile-nginx
  71. 12 0
      engine/config/containers/Dockerfile-web
  72. 2 0
      engine/config/containers/app_cmd.sh
  73. 43 0
      engine/config/containers/nginx-crm.conf
  74. 44 0
      engine/config/containers/nginx.conf
  75. 61 0
      engine/config/containers/unicorn.rb
  76. 29 0
      engine/config/database.yml
  77. 31 0
      engine/config/deploy.rb
  78. 75 0
      engine/config/deploy/production.rb
  79. 5 0
      engine/config/environment.rb
  80. 21 0
      engine/config/environments/development.rb
  81. 29 0
      engine/config/environments/production.rb
  82. 23 0
      engine/config/environments/test.rb
  83. 1 0
      engine/config/initializers/active_job.rb
  84. 10 0
      engine/config/initializers/analyzers.rb
  85. 2 0
      engine/config/initializers/docker.rb
  86. 6 0
      engine/config/initializers/email_validation.rb
  87. 6 0
      engine/config/initializers/ethnicity.rb
  88. 3 0
      engine/config/initializers/polipus.rb
  89. 2 0
      engine/config/initializers/redis.rb
  90. 7 0
      engine/config/initializers/searchkick.rb
  91. 37 0
      engine/config/initializers/sidekiq.rb
  92. 2 0
      engine/config/initializers/tika.rb
  93. 35 0
      engine/config/initializers/vars.rb
  94. 64 0
      engine/config/locales/devise.en.yml
  95. 31 0
      engine/config/locales/devise_invitable.en.yml
  96. 64 0
      engine/config/locales/en.yml
  97. 6 0
      engine/config/routes.rb
  98. 4 0
      engine/config/schedule.rb
  99. 15 0
      engine/config/secrets.yml
  100. 25 0
      engine/config/sidekiq.yml

+ 3 - 0
engine/.dockerignore

@@ -0,0 +1,3 @@
+.git
+.env
+.dockerignore

+ 12 - 0
engine/.gitignore

@@ -0,0 +1,12 @@
+.vagrant
+.idea
+.bundle
+tmp/
+log/
+./config/database.yml
+./config/secrets.yml
+.DS_Store
+./tmp/cache
+log/*.log
+tmp/pids
+tmp

+ 1 - 0
engine/.ruby-version

@@ -0,0 +1 @@
+2.2.6

+ 7 - 0
engine/Capfile

@@ -0,0 +1,7 @@
+# Load DSL and Setup Up Stages
+require 'capistrano/setup'
+require 'capistrano/deploy'
+require 'capistrano/bundler'
+require 'capistrano/sidekiq'
+require 'capistrano/sidekiq/monit'
+Dir.glob('lib/capistrano/tasks/*.cap').each { |r| import r }

+ 15 - 0
engine/Dockerfile

@@ -0,0 +1,15 @@
+FROM ruby:2.3.3
+MAINTAINER Peter Alcock <admin@peteralcock.com>
+RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends build-essential nodejs libpq-dev make cmake autoconf automake libcurl3 zlib1g-dev libqt4-dev bison libpq-dev imagemagick libmagickwand-dev libkrb5-dev libicu-dev libssl-dev libyaml-dev zlib1g-dev libxml2-dev libxslt1-dev libcurl4-openssl-dev libqtwebkit-dev bison libssl-dev libc6-dev
+ENV RACK_ENV production
+ENV RAILS_ENV production
+ENV INSTALL_PATH /app
+RUN mkdir -p $INSTALL_PATH
+WORKDIR $INSTALL_PATH
+ADD . $INSTALL_PATH
+COPY . .
+COPY Gemfile Gemfile
+COPY Gemfile.lock Gemfile.lock
+RUN bundle install --without test development
+COPY . .
+CMD ['config/containers/app_cmd.sh']

+ 16 - 0
engine/Dockerfile-base

@@ -0,0 +1,16 @@
+FROM ruby:2.2.5
+MAINTAINER Peter Alcock <admin@peteralcock.com>
+RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends build-essential nodejs libpq-dev make cmake autoconf automake libcurl3 zlib1g-dev libqt4-dev bison libpq-dev imagemagick libmagickwand-dev libkrb5-dev libicu-dev libssl-dev libyaml-dev zlib1g-dev libxml2-dev libxslt1-dev libcurl4-openssl-dev libqtwebkit-dev bison libssl-dev libc6-dev
+ENV RACK_ENV production
+ENV RAILS_ENV production
+ENV INSTALL_PATH /app
+RUN mkdir -p $INSTALL_PATH
+ADD . $INSTALL_PATH
+WORKDIR /tmp
+COPY Gemfile Gemfile
+COPY Gemfile.lock Gemfile.lock
+RUN bundle install --without test development
+WORKDIR $INSTALL_PATH
+COPY . .
+CMD ['config/containers/app_cmd.sh']
+

+ 8 - 0
engine/Dockerfile-prod

@@ -0,0 +1,8 @@
+FROM contactrocket/engine:latest
+ENV RACK_ENV production
+ENV RAILS_ENV production
+ENV INSTALL_PATH /app
+WORKDIR $INSTALL_PATH
+COPY . .
+RUN bundle install --without test development
+CMD ['config/containers/app_cmd.sh']

+ 9 - 0
engine/Dockerfile-test

@@ -0,0 +1,9 @@
+FROM contactrocket/engine:latest
+RUN apt-get update -qq && apt-get install -y xvfb sqlite3 libsqlite3-dev libqt5webkit5-dev libqtwebkit-dev xfonts-100dpi xfonts-75dpi xfonts-scalable xfonts-cyrillic x11-apps libqt4-webkit libqt4-dev xvfb nodejs
+ENV INSTALL_PATH /app
+ENV RACK_ENV test
+ENV RAILS_ENV test
+WORKDIR $INSTALL_PATH
+COPY . .
+RUN bundle install --without development
+CMD ['config/containers/app_cmd.sh']

+ 148 - 0
engine/Gemfile

@@ -0,0 +1,148 @@
+source 'https://rubygems.org'
+  ruby '2.2.6'
+  gem 'rails', '5.0.0.1'
+  gem 'pg'
+  gem 'bcrypt-ruby'
+  gem 'iconv'
+  gem 'phonelib'
+  gem 'phony_rails'
+  gem 'redis', '~> 3.2'
+  gem 'hiredis', '~> 0.6.1'
+  gem 'redis-objects'
+  gem 'bson_ext'
+  gem 'curb'
+  gem 'sidekiq', '< 5'
+  #gem 'sidekiq-pro', '< 4', :source => 'https://username:password@gems.contribsys.com/'
+  gem 'sidekiq-benchmark'
+  gem 'sidekiq-status'
+
+  gem "searchjoy"
+  gem 'eventmachine'
+  gem 'em-udns'
+  gem 'nokogiri'
+  gem 'faraday_middleware-aws-signers-v4'
+  gem 'elasticsearch', '>= 1.0.15'
+  gem 'searchkick'
+  gem 'statsd-ruby'
+  gem 'Indirizzo'
+  gem 'searchjoy', :require => false
+  gem 'polipus', '~> 0.5.1'
+  gem 'polipus-elasticsearch'
+  # gem 'polipus-storage-s3'
+  gem 'fletcher'
+  gem 'PageRankr'
+  gem 'googlepagerank'
+  gem 'social_shares'
+  gem 'linkedin-scraper'
+  gem 'docker-api'
+  gem 'net-telnet'
+  gem 'dnsruby'
+  gem 'metainspector'
+  gem 'whois'
+  gem 'pismo'
+  gem 'mechanize'
+  gem 'whatlanguage'
+  gem 'ruby-readability'
+  gem 'words_counted'
+  gem 'word_count_analyzer'
+  gem 'text'
+  gem 'namae'
+  gem 'fuzzy_match'
+  gem 'fuzzy-string-match'
+  gem 'engtagger'
+  gem 'guess'
+  gem 'sad_panda'
+  gem 'odyssey'
+  gem 'stopwords-filter'
+  gem 'validates_phone_number'
+  gem 'mx-validator'
+  gem 'ValidateEmail'
+  gem 'email-authentication'
+  gem 'ruby-mailchecker'
+  gem 'email_domain_validator'
+  gem 'email_verifier'
+  gem 'twitter'
+  gem 'swot'
+  gem 'gman'
+  gem 'area'
+  gem 'sentimental'
+  gem 'reality'
+  gem 'eventmachine'
+  gem 'rubydns'
+  gem 'rb-readline'
+#  gem 'lonely_coder'
+  gem 'roo'
+  gem 'tika-app'
+
+gem 'sidekiq-history'
+gem 'sidekiq-hierarchy'
+gem 'sidekiq-lock'
+gem 'sidekiq-statsd'
+# gem 'sidekiq-throttler'
+gem 'clarifier'
+
+
+group :development, :test do
+# gem 'predictionio', '0.9.6'
+   gem 'ffi'
+   gem 'letter_opener'
+   gem 'letter_opener_web'
+#  gem 'email_validator', :require => 'email_validator/strict'
+
+   # gem 'sidekiq-monitor-stats'
+ #  gem 'sidekiq-benchmark'
+  # gem 'sidekiq-statistic'
+ #  gem 'sidetiq'
+  # gem 'sidekiq-failures'
+ # # gem 'sidekiq-throttler'
+ #  gem 'sidekiq-grouping'
+ #  gem 'sidekiq-limit_fetch'
+   # gem 'sidekiq_mailer'
+ #  gem 'sidekiq-unique-jobs'
+  # gem 'sidekiq_status'
+ #
+
+# << Alternative
+  # gem 'sidekiq_monitor' # rails g sidekiq:monitor:install && rake db:migrate
+# gem 'activerecord4-redshift-adapter', '~> 0.2.0'
+# gem 'mongo'
+# gem 'apartment'
+ # gem 'apartment-sidekiq'
+
+  gem 'capistrano'
+  gem 'capistrano-rvm'
+  gem 'capistrano-bundler'
+  gem 'capistrano-rails'
+  gem 'capistrano-sidekiq' # , github: 'seuros/capistrano-sidekiq'
+  gem 'capistrano-rails-console'
+  gem 'capistrano-composer'
+  gem 'ffaker'
+  gem 'rspec-core'
+  gem 'rspec-rails'
+  gem 'benchmark-ips'
+  gem 'derailed'
+  gem 'stackprof'
+  gem 'method_profiler'
+  gem 'database_cleaner'
+  gem 'guard' , :require => false
+  gem 'rb-fchange', :require => false
+  gem 'rb-fsevent', :require => false
+  gem 'rb-inotify', :require => false
+  gem 'guard-bundler'
+  gem 'guard-rspec'
+  gem 'guard-rails'
+  gem 'guard-sidekiq'
+  gem 'fuubar'
+  gem 'growl'
+  gem 'serverspec'
+  gem 'classifier-reborn'
+  gem 'nlp-pure'
+  gem 'punkt-segmenter'
+  gem 'textoken'
+  gem 'pragmatic_segmenter'
+  gem 'tactful_tokenizer'
+  gem "scalpel"
+  gem 'pragmatic_tokenizer' # THIS IS AWESOME
+
+ end
+

+ 736 - 0
engine/Gemfile.lock

@@ -0,0 +1,736 @@
+GEM
+  remote: https://rubygems.org/
+  remote: https://username:password@gems.contribsys.com/
+  specs:
+    Indirizzo (0.1.7)
+    PageRankr (4.4.0)
+      httparty (>= 0.9.0)
+      json (>= 1.4.6)
+      jsonpath (>= 0.4.2)
+      nokogiri (>= 1.4.1)
+      public_suffix (>= 1.4.4)
+    RubyInline (3.12.4)
+      ZenTest (~> 4.3)
+    ValidateEmail (1.0.1)
+    ZenTest (4.11.1)
+    actioncable (5.0.0.1)
+      actionpack (= 5.0.0.1)
+      nio4r (~> 1.2)
+      websocket-driver (~> 0.6.1)
+    actionmailer (5.0.0.1)
+      actionpack (= 5.0.0.1)
+      actionview (= 5.0.0.1)
+      activejob (= 5.0.0.1)
+      mail (~> 2.5, >= 2.5.4)
+      rails-dom-testing (~> 2.0)
+    actionpack (5.0.0.1)
+      actionview (= 5.0.0.1)
+      activesupport (= 5.0.0.1)
+      rack (~> 2.0)
+      rack-test (~> 0.6.3)
+      rails-dom-testing (~> 2.0)
+      rails-html-sanitizer (~> 1.0, >= 1.0.2)
+    actionview (5.0.0.1)
+      activesupport (= 5.0.0.1)
+      builder (~> 3.1)
+      erubis (~> 2.7.0)
+      rails-dom-testing (~> 2.0)
+      rails-html-sanitizer (~> 1.0, >= 1.0.2)
+    activejob (5.0.0.1)
+      activesupport (= 5.0.0.1)
+      globalid (>= 0.3.6)
+    activemodel (5.0.0.1)
+      activesupport (= 5.0.0.1)
+    activerecord (5.0.0.1)
+      activemodel (= 5.0.0.1)
+      activesupport (= 5.0.0.1)
+      arel (~> 7.0)
+    activesupport (5.0.0.1)
+      concurrent-ruby (~> 1.0, >= 1.0.2)
+      i18n (~> 0.7)
+      minitest (~> 5.1)
+      tzinfo (~> 1.1)
+    addressable (2.5.0)
+      public_suffix (~> 2.0, >= 2.0.2)
+    airbrussh (1.1.1)
+      sshkit (>= 1.6.1, != 1.7.0)
+    area (0.10.0)
+      fastercsv (~> 1.5)
+    arel (7.1.4)
+    awesome_print (1.7.0)
+    aws-sdk (2.6.38)
+      aws-sdk-resources (= 2.6.38)
+    aws-sdk-core (2.6.38)
+      aws-sigv4 (~> 1.0)
+      jmespath (~> 1.0)
+    aws-sdk-resources (2.6.38)
+      aws-sdk-core (= 2.6.38)
+    aws-sigv4 (1.0.0)
+    backports (3.6.8)
+    bcrypt (3.1.11)
+    bcrypt-ruby (3.1.5)
+      bcrypt (>= 3.1.3)
+    benchmark-ips (2.7.2)
+    bson (1.12.5)
+    bson_ext (1.12.5)
+      bson (~> 1.12.5)
+    buftok (0.2.0)
+    builder (3.2.2)
+    capistrano (3.7.1)
+      airbrussh (>= 1.0.0)
+      capistrano-harrow
+      i18n
+      rake (>= 10.0.0)
+      sshkit (>= 1.9.0)
+    capistrano-bundler (1.2.0)
+      capistrano (~> 3.1)
+      sshkit (~> 1.2)
+    capistrano-composer (0.0.6)
+      capistrano (>= 3.0.0.pre)
+    capistrano-harrow (0.5.3)
+    capistrano-rails (1.2.0)
+      capistrano (~> 3.1)
+      capistrano-bundler (~> 1.1)
+    capistrano-rails-console (2.2.0)
+      capistrano (>= 3.5.0, < 4.0.0)
+      sshkit-interactive (~> 0.2.0)
+    capistrano-rvm (0.1.2)
+      capistrano (~> 3.0)
+      sshkit (~> 1.2)
+    capistrano-sidekiq (0.10.0)
+      capistrano
+      sidekiq (>= 3.4)
+    celluloid (0.16.0)
+      timers (~> 4.0.0)
+    celluloid-io (0.16.2)
+      celluloid (>= 0.16.0)
+      nio4r (>= 1.1.0)
+    chartkick (2.2.1)
+    chronic (0.10.2)
+    clarifier (0.9.1)
+    classifier-reborn (2.1.0)
+      fast-stemmer (~> 1.0)
+    coderay (1.1.1)
+    colored (1.2)
+    concurrent-ruby (1.0.5)
+    connection_pool (2.2.1)
+    crass (1.0.2)
+    curb (0.9.3)
+    database_cleaner (1.5.3)
+    derailed (0.1.0)
+      derailed_benchmarks
+    derailed_benchmarks (1.3.1)
+      benchmark-ips (~> 2)
+      get_process_mem (~> 0)
+      heapy (~> 0)
+      memory_profiler (~> 0)
+      rack (>= 1)
+      rake (> 10, < 12)
+      thor (~> 0.19)
+    diff-lcs (1.2.5)
+    dnsruby (1.60.0)
+    docker-api (1.33.0)
+      excon (>= 0.38.0)
+      json
+    domain_name (0.5.20161129)
+      unf (>= 0.0.5, < 1.0.0)
+    elasticsearch (1.0.18)
+      elasticsearch-api (= 1.0.18)
+      elasticsearch-transport (= 1.0.18)
+    elasticsearch-api (1.0.18)
+      multi_json
+    elasticsearch-model (0.1.9)
+      activesupport (> 3)
+      elasticsearch (> 0.4)
+      hashie
+    elasticsearch-transport (1.0.18)
+      faraday
+      multi_json
+    em-udns (0.3.6)
+    email-authentication (0.2.5)
+      dnsruby
+      thor
+    email_domain_validator (0.0.1)
+    email_verifier (0.1.0)
+      dnsruby (>= 1.5)
+    engtagger (0.2.1)
+    equalizer (0.0.11)
+    erubis (2.7.0)
+    eventmachine (1.2.1)
+    excon (0.54.0)
+    faraday (0.10.0)
+      multipart-post (>= 1.2, < 3)
+    faraday-cookie_jar (0.0.6)
+      faraday (>= 0.7.4)
+      http-cookie (~> 1.0.0)
+    faraday-encoding (0.0.4)
+      faraday
+    faraday-http-cache (1.3.1)
+      faraday (~> 0.8)
+    faraday_middleware (0.10.1)
+      faraday (>= 0.7.4, < 1.0)
+    faraday_middleware-aws-signers-v4 (0.1.5)
+      aws-sdk (~> 2.1)
+      faraday (~> 0.9)
+    fast-stemmer (1.0.2)
+    fastercsv (1.5.5)
+    fastimage (2.0.1)
+      addressable (~> 2)
+    ffaker (2.3.0)
+    ffi (1.9.14)
+    fletcher (0.6.9)
+      hashie
+      monetize
+      money
+      nokogiri
+      thor
+    formatador (0.2.5)
+    fuubar (2.2.0)
+      rspec-core (~> 3.0)
+      ruby-progressbar (~> 1.4)
+    fuzzy-string-match (0.9.7)
+      RubyInline (>= 3.8.6)
+    fuzzy_match (2.1.0)
+    geokit (1.10.0)
+    get_process_mem (0.2.1)
+    globalid (0.3.7)
+      activesupport (>= 4.1.0)
+    gman (7.0.1)
+      colored (~> 1.2)
+      iso_country_codes (~> 0.6)
+      naughty_or_nice (~> 2.0)
+    googlepagerank (1.0.4)
+      activesupport (>= 1.4.2)
+      hoe (>= 1.5.3)
+      hpricot (>= 0.4.86)
+    groupdate (3.1.1)
+      activesupport (>= 3)
+    growl (1.0.3)
+    guard (2.14.0)
+      formatador (>= 0.2.4)
+      listen (>= 2.7, < 4.0)
+      lumberjack (~> 1.0)
+      nenv (~> 0.1)
+      notiffany (~> 0.0)
+      pry (>= 0.9.12)
+      shellany (~> 0.0)
+      thor (>= 0.18.1)
+    guard-bundler (2.1.0)
+      bundler (~> 1.0)
+      guard (~> 2.2)
+      guard-compat (~> 1.1)
+    guard-compat (1.2.1)
+    guard-rails (0.8.0)
+      guard (~> 2.11)
+      guard-compat (~> 1.0)
+    guard-rspec (4.7.3)
+      guard (~> 2.1)
+      guard-compat (~> 1.1)
+      rspec (>= 2.99.0, < 4.0)
+    guard-sidekiq (0.1.0)
+      guard (>= 2)
+      sidekiq
+    guess (0.1.0)
+    guess_html_encoding (0.0.11)
+    hashie (3.4.6)
+    heapy (0.1.2)
+    hirb (0.7.3)
+    hiredis (0.6.1)
+    hitimes (1.2.4)
+    hoe (3.15.3)
+      rake (>= 0.8, < 12.0)
+    hpricot (0.8.6)
+    htmlentities (4.3.4)
+    http (2.1.0)
+      addressable (~> 2.3)
+      http-cookie (~> 1.0)
+      http-form_data (~> 1.0.1)
+      http_parser.rb (~> 0.6.0)
+    http-cookie (1.0.3)
+      domain_name (~> 0.5)
+    http-form_data (1.0.1)
+    http_parser.rb (0.6.0)
+    httparty (0.14.0)
+      multi_xml (>= 0.5.2)
+    i18n (0.7.0)
+    iconv (1.0.4)
+    infoboxer (0.2.7)
+      addressable
+      backports
+      htmlentities
+      mediawiktory (>= 0.0.2)
+      procme
+      terminal-table
+    iso_country_codes (0.7.5)
+    jmespath (1.3.1)
+    json (1.8.3)
+    jsonpath (0.5.8)
+      multi_json
+    launchy (2.4.3)
+      addressable (~> 2.3)
+    letter_opener (1.4.1)
+      launchy (~> 2.2)
+    letter_opener_web (1.3.0)
+      actionmailer (>= 3.2)
+      letter_opener (~> 1.0)
+      railties (>= 3.2)
+    linkedin-scraper (2.1.1)
+      mechanize (~> 2)
+      random_user_agent
+    listen (3.1.5)
+      rb-fsevent (~> 0.9, >= 0.9.4)
+      rb-inotify (~> 0.9, >= 0.9.7)
+      ruby_dep (~> 1.2)
+    loofah (2.0.3)
+      nokogiri (>= 1.5.9)
+    lumberjack (1.0.10)
+    mail (2.6.4)
+      mime-types (>= 1.16, < 4)
+    mechanize (2.7.5)
+      domain_name (~> 0.5, >= 0.5.1)
+      http-cookie (~> 1.0)
+      mime-types (>= 1.17.2)
+      net-http-digest_auth (~> 1.1, >= 1.1.1)
+      net-http-persistent (~> 2.5, >= 2.5.2)
+      nokogiri (~> 1.6)
+      ntlm-http (~> 0.1, >= 0.1.1)
+      webrobots (>= 0.0.9, < 0.2)
+    mediawiktory (0.0.2)
+      addressable
+      faraday
+      faraday_middleware
+      hashie
+    memoizable (0.4.2)
+      thread_safe (~> 0.3, >= 0.3.1)
+    memory_profiler (0.9.7)
+    metainspector (5.3.1)
+      addressable (~> 2.4)
+      faraday (~> 0.9)
+      faraday-cookie_jar (~> 0.0)
+      faraday-encoding (~> 0.0.3)
+      faraday-http-cache (~> 1.2)
+      faraday_middleware (~> 0.10)
+      fastimage (~> 2.0)
+      nesty (~> 1.0)
+      nokogiri (~> 1.6)
+    method_profiler (2.0.1)
+      hirb (>= 0.6.0)
+    method_source (0.8.2)
+    mime-types (2.99.3)
+    mini_portile2 (2.1.0)
+    minitest (5.10.1)
+    monetize (1.4.0)
+      money (~> 6.7)
+    money (6.7.1)
+      i18n (>= 0.6.4, <= 0.7.0)
+      sixarm_ruby_unaccent (>= 1.1.1, < 2)
+    multi_json (1.12.1)
+    multi_xml (0.6.0)
+    multipart-post (2.0.0)
+    mx-validator (0.0.1)
+      dnsruby (>= 1.5.4)
+    namae (0.11.3)
+    naught (1.1.0)
+    naughty_or_nice (2.1.0)
+      addressable (~> 2.3)
+      public_suffix (~> 2.0)
+    nenv (0.3.0)
+    nesty (1.0.2)
+    net-http-digest_auth (1.4)
+    net-http-persistent (2.9.4)
+    net-scp (1.2.1)
+      net-ssh (>= 2.6.5)
+    net-ssh (3.2.0)
+    net-telnet (0.1.1)
+    netrc (0.11.0)
+    nio4r (1.2.1)
+    nlp-pure (0.1.0)
+    nokogiri (1.6.8.1)
+      mini_portile2 (~> 2.1.0)
+    nokogumbo (1.4.10)
+      nokogiri
+    notiffany (0.1.1)
+      nenv (~> 0.1)
+      shellany (~> 0.0)
+    ntlm-http (0.1.1)
+    odyssey (0.2.0)
+      require_all
+    open-weather (0.12.0)
+      json (~> 1)
+    pg (0.19.0)
+    phonelib (0.6.8)
+    phony (2.15.38)
+    phony_rails (0.14.4)
+      activesupport (>= 3.0)
+      phony (~> 2.15)
+    pismo (0.7.4)
+      awesome_print
+      chronic
+      fast-stemmer
+      nokogiri
+      sanitize
+    polipus (0.5.1)
+      hiredis (~> 0.5, >= 0.4.5)
+      http-cookie (~> 1.0, >= 1.0.1)
+      nokogiri (~> 1.6, >= 1.6.0)
+      redis (~> 3.0, >= 3.0.4)
+      redis-bloomfilter (~> 0.0, >= 0.0.3)
+      redis-queue (~> 0.0, >= 0.0.4)
+    polipus-elasticsearch (0.0.4)
+      elasticsearch (~> 1.0.4)
+      elasticsearch-model (~> 0.1.4)
+      polipus (~> 0.3, >= 0.3.0)
+    pragmatic_segmenter (0.3.13)
+      unicode
+    pragmatic_tokenizer (3.0.4)
+      unicode
+    procme (0.0.3)
+    pry (0.10.4)
+      coderay (~> 1.1.0)
+      method_source (~> 0.8.1)
+      slop (~> 3.4)
+    public_suffix (2.0.4)
+    punkt-segmenter (0.9.1)
+      unicode_utils (>= 1.0.0)
+    quandl (1.0.3)
+      activesupport (>= 4.2.3)
+      json (~> 1.8.3)
+      rest-client (~> 1.8.0)
+    rack (2.0.1)
+    rack-protection (1.5.3)
+      rack
+    rack-test (0.6.3)
+      rack (>= 1.0)
+    rails (5.0.0.1)
+      actioncable (= 5.0.0.1)
+      actionmailer (= 5.0.0.1)
+      actionpack (= 5.0.0.1)
+      actionview (= 5.0.0.1)
+      activejob (= 5.0.0.1)
+      activemodel (= 5.0.0.1)
+      activerecord (= 5.0.0.1)
+      activesupport (= 5.0.0.1)
+      bundler (>= 1.3.0, < 2.0)
+      railties (= 5.0.0.1)
+      sprockets-rails (>= 2.0.0)
+    rails-dom-testing (2.0.1)
+      activesupport (>= 4.2.0, < 6.0)
+      nokogiri (~> 1.6.0)
+    rails-html-sanitizer (1.0.3)
+      loofah (~> 2.0)
+    railties (5.0.0.1)
+      actionpack (= 5.0.0.1)
+      activesupport (= 5.0.0.1)
+      method_source
+      rake (>= 0.8.7)
+      thor (>= 0.18.1, < 2.0)
+    rake (11.3.0)
+    random_user_agent (1.0.0)
+    rb-fchange (0.0.6)
+      ffi
+    rb-fsevent (0.9.8)
+    rb-inotify (0.9.7)
+      ffi (>= 0.5.0)
+    rb-readline (0.5.3)
+    reality (0.0.4)
+      geokit
+      hashie
+      infoboxer (>= 0.2.4)
+      open-weather
+      quandl
+      ruby-sun-times
+      time_boots
+      timezone (>= 0.99.0)
+      tzinfo
+    redis (3.3.3)
+    redis-bloomfilter (0.0.3)
+      hiredis (~> 0.5, >= 0.5.2)
+      redis (~> 3.0, >= 3.0.4)
+    redis-objects (1.2.1)
+      redis (>= 3.0.2)
+    redis-queue (0.0.4)
+      hiredis (~> 0.5, >= 0.5.2)
+      redis (~> 3.0, >= 3.0.4)
+    require_all (1.3.3)
+    rest-client (1.8.0)
+      http-cookie (>= 1.0.2, < 2.0)
+      mime-types (>= 1.16, < 3.0)
+      netrc (~> 0.7)
+    roo (2.5.1)
+      nokogiri (~> 1)
+      rubyzip (~> 1.1, < 2.0.0)
+    rspec (3.5.0)
+      rspec-core (~> 3.5.0)
+      rspec-expectations (~> 3.5.0)
+      rspec-mocks (~> 3.5.0)
+    rspec-core (3.5.4)
+      rspec-support (~> 3.5.0)
+    rspec-expectations (3.5.0)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.5.0)
+    rspec-its (1.2.0)
+      rspec-core (>= 3.0.0)
+      rspec-expectations (>= 3.0.0)
+    rspec-mocks (3.5.0)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.5.0)
+    rspec-rails (3.5.2)
+      actionpack (>= 3.0)
+      activesupport (>= 3.0)
+      railties (>= 3.0)
+      rspec-core (~> 3.5.0)
+      rspec-expectations (~> 3.5.0)
+      rspec-mocks (~> 3.5.0)
+      rspec-support (~> 3.5.0)
+    rspec-support (3.5.0)
+    ruby-mailchecker (3.0.19)
+    ruby-progressbar (1.8.1)
+    ruby-readability (0.7.0)
+      guess_html_encoding (>= 0.0.4)
+      nokogiri (>= 1.6.0)
+    ruby-stemmer (0.9.6)
+    ruby-sun-times (0.1.5)
+    ruby_dep (1.5.0)
+    rubydns (1.0.3)
+      celluloid (= 0.16.0)
+      celluloid-io (= 0.16.2)
+      timers (~> 4.0.1)
+    rubyzip (1.2.0)
+    sad_panda (1.0.1)
+      ruby-stemmer
+    sanitize (4.4.0)
+      crass (~> 1.0.2)
+      nokogiri (>= 1.4.4)
+      nokogumbo (~> 1.4.1)
+    scalpel (0.2.1)
+    searchjoy (0.1.0)
+      activerecord
+      chartkick
+      groupdate
+    searchkick (1.4.1)
+      activemodel
+      elasticsearch (>= 1)
+      hashie
+    sentimental (1.4.0)
+      json (~> 1.8, >= 1.8.3)
+    serverspec (2.37.2)
+      multi_json
+      rspec (~> 3.0)
+      rspec-its
+      specinfra (~> 2.53)
+    sfl (2.3)
+    shellany (0.0.1)
+    sidekiq (4.2.10)
+      concurrent-ruby (~> 1.0)
+      connection_pool (~> 2.2, >= 2.2.0)
+      rack-protection (>= 1.5.0)
+      redis (~> 3.2, >= 3.2.1)
+    sidekiq-benchmark (0.5.2)
+      chartkick (>= 1.1.1)
+      sidekiq (~> 4)
+    sidekiq-hierarchy (2.1.0)
+      connection_pool (~> 2.0)
+      sidekiq (> 3.3, < 5)
+    sidekiq-history (0.0.7)
+      sidekiq (>= 3.0.0)
+    sidekiq-lock (0.3.0)
+      redis (>= 3.0.5)
+      sidekiq (>= 2.14.0)
+    sidekiq-pro (3.4.5)
+      sidekiq (>= 4.1.5)
+    sidekiq-statsd (0.1.5)
+      activesupport
+      sidekiq (>= 2.6)
+      statsd-ruby (>= 1.1.0)
+    sidekiq-status (0.6.0)
+      sidekiq (>= 2.7)
+    simple_oauth (0.3.1)
+    sixarm_ruby_unaccent (1.1.1)
+    slop (3.6.0)
+    social_shares (0.3.2)
+      json
+      rest-client
+    specinfra (2.66.3)
+      net-scp
+      net-ssh (>= 2.7, < 4.0)
+      net-telnet
+      sfl
+    sprockets (3.7.0)
+      concurrent-ruby (~> 1.0)
+      rack (> 1, < 3)
+    sprockets-rails (3.2.0)
+      actionpack (>= 4.0)
+      activesupport (>= 4.0)
+      sprockets (>= 3.0.0)
+    sshkit (1.11.5)
+      net-scp (>= 1.1.2)
+      net-ssh (>= 2.8.0)
+    sshkit-interactive (0.2.0)
+      sshkit (~> 1.9)
+    stackprof (0.2.10)
+    statsd-ruby (1.3.0)
+    stopwords-filter (0.3.1)
+    swot (1.0.0)
+      naughty_or_nice (~> 2.0)
+      public_suffix
+    tactful_tokenizer (0.0.5)
+    terminal-table (1.7.3)
+      unicode-display_width (~> 1.1.1)
+    text (1.3.1)
+    textoken (1.1.2)
+    thor (0.19.4)
+    thread_safe (0.3.5)
+    tika-app (0.2.1)
+    time_boots (0.0.2)
+    timers (4.0.4)
+      hitimes
+    timezone (1.2.4)
+    twitter (6.0.0)
+      addressable (~> 2.5)
+      buftok (~> 0.2.0)
+      equalizer (= 0.0.11)
+      faraday (~> 0.10.0)
+      http (~> 2.1)
+      http_parser.rb (~> 0.6.0)
+      memoizable (~> 0.4.2)
+      naught (~> 1.1)
+      simple_oauth (~> 0.3.1)
+    tzinfo (1.2.2)
+      thread_safe (~> 0.1)
+    unf (0.1.4)
+      unf_ext
+    unf_ext (0.0.7.2)
+    unicode (0.4.4.2)
+    unicode-display_width (1.1.2)
+    unicode_utils (1.4.0)
+    validates_phone_number (2.0.1)
+    webrobots (0.1.2)
+    websocket-driver (0.6.4)
+      websocket-extensions (>= 0.1.0)
+    websocket-extensions (0.1.2)
+    whatlanguage (1.0.6)
+    whois (4.0.1)
+    word_count_analyzer (1.0.0)
+      engtagger
+    words_counted (1.0.2)
+
+PLATFORMS
+  ruby
+
+DEPENDENCIES
+  Indirizzo
+  PageRankr
+  ValidateEmail
+  area
+  bcrypt-ruby
+  benchmark-ips
+  bson_ext
+  capistrano
+  capistrano-bundler
+  capistrano-composer
+  capistrano-rails
+  capistrano-rails-console
+  capistrano-rvm
+  capistrano-sidekiq
+  clarifier
+  classifier-reborn
+  curb
+  database_cleaner
+  derailed
+  dnsruby
+  docker-api
+  elasticsearch (>= 1.0.15)
+  em-udns
+  email-authentication
+  email_domain_validator
+  email_verifier
+  engtagger
+  eventmachine
+  faraday_middleware-aws-signers-v4
+  ffaker
+  ffi
+  fletcher
+  fuubar
+  fuzzy-string-match
+  fuzzy_match
+  gman
+  googlepagerank
+  growl
+  guard
+  guard-bundler
+  guard-rails
+  guard-rspec
+  guard-sidekiq
+  guess
+  hiredis (~> 0.6.1)
+  iconv
+  letter_opener
+  letter_opener_web
+  linkedin-scraper
+  mechanize
+  metainspector
+  method_profiler
+  mx-validator
+  namae
+  net-telnet
+  nlp-pure
+  nokogiri
+  odyssey
+  pg
+  phonelib
+  phony_rails
+  pismo
+  polipus (~> 0.5.1)
+  polipus-elasticsearch
+  pragmatic_segmenter
+  pragmatic_tokenizer
+  punkt-segmenter
+  rails (= 5.0.0.1)
+  rb-fchange
+  rb-fsevent
+  rb-inotify
+  rb-readline
+  reality
+  redis (~> 3.2)
+  redis-objects
+  roo
+  rspec-core
+  rspec-rails
+  ruby-mailchecker
+  ruby-readability
+  rubydns
+  sad_panda
+  scalpel
+  searchjoy
+  searchkick
+  sentimental
+  serverspec
+  sidekiq (< 5)
+  sidekiq-benchmark
+  sidekiq-hierarchy
+  sidekiq-history
+  sidekiq-lock
+  sidekiq-pro (< 4)!
+  sidekiq-statsd
+  sidekiq-status
+  social_shares
+  stackprof
+  statsd-ruby
+  stopwords-filter
+  swot
+  tactful_tokenizer
+  text
+  textoken
+  tika-app
+  twitter
+  validates_phone_number
+  whatlanguage
+  whois
+  word_count_analyzer
+  words_counted
+
+RUBY VERSION
+   ruby 2.2.6p396
+
+BUNDLED WITH
+   1.14.6

+ 36 - 0
engine/Guardfile

@@ -0,0 +1,36 @@
+
+
+guard :bundler do
+  require 'guard/bundler'
+  require 'guard/bundler/verify'
+  helper = Guard::Bundler::Verify.new
+  files = ['Gemfile']
+  files += Dir['*.gemspec'] if files.any? { |f| helper.uses_gemspec?(f) }
+  files.each { |file| watch(helper.real_path(file)) }
+end
+
+
+guard 'sidekiq', :cli => '-C config/sidekiq.yml -L log/sidekiq.log -P tmp/pids/sidekiq.pid -e development' do
+  watch(%r{^workers/(.+)\.rb$})
+  watch(%r{^models/(.+)\.rb$})
+end
+
+
+guard :rspec, cmd: "bundle exec rspec"  do
+  require "guard/rspec/dsl"
+  dsl = Guard::RSpec::Dsl.new(self)
+
+  # RSpec files
+  rspec = dsl.rspec
+  watch(rspec.spec_support) { rspec.spec_dir }
+  watch(rspec.spec_files)
+
+  # Ruby files
+  ruby = dsl.ruby
+  dsl.watch_spec_files_for(ruby.lib_files)
+
+end
+
+
+
+

+ 1 - 0
engine/Procfile

@@ -0,0 +1 @@
+worker: sidekiq start -L log/sidekiq.log -P tmp/pids/sidekiq.pid -e $RAILS_ENV

+ 6 - 0
engine/Rakefile

@@ -0,0 +1,6 @@
+# Add your own tasks in files placed in lib/tasks ending in .rake,
+# for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
+
+require File.expand_path('../config/application', __FILE__)
+
+Rails.application.load_tasks

+ 2 - 0
engine/app/helpers/activities_helper.rb

@@ -0,0 +1,2 @@
+module ActivitiesHelper
+end

+ 2 - 0
engine/app/helpers/application_helper.rb

@@ -0,0 +1,2 @@
+module ApplicationHelper
+end

+ 2 - 0
engine/app/helpers/prospects_helper.rb

@@ -0,0 +1,2 @@
+module ProspectsHelper
+end

+ 8 - 0
engine/app/jobs/mailing_list_signup_job.rb

@@ -0,0 +1,8 @@
+class MailingListSignupJob < ActiveJob::Base
+
+  def perform(user)
+    logger.info "signing up #{user.email}"
+    user.subscribe
+  end
+
+end

+ 9 - 0
engine/app/mailers/user_mailer.rb

@@ -0,0 +1,9 @@
+class UserMailer < ActionMailer::Base
+  default :from => "no-reply@your-server.net"
+
+  def marketing(email)
+    mail(:to => email, :subject => "Someone's looking for you on ContactRocket!")
+  end
+
+
+end

+ 0 - 0
engine/app/models/.keep


+ 17 - 0
engine/app/models/analysis.rb

@@ -0,0 +1,17 @@
+class Analysis
+
+  def image_similarity(url_1, url_2)
+
+  end
+
+  def image_sentiment(image_url)
+
+  end
+
+  def image_tags(url)
+
+  end
+
+
+
+end

+ 39 - 0
engine/app/models/bot.rb

@@ -0,0 +1,39 @@
+require 'curb'
+require 'securerandom'
+class Bot
+
+  def initialize(user_id, greeting)
+
+    @guest = User.find(user_id)
+    @convo_id = SecureRandom.hex(12)
+    @message = greeting
+    resp = Curb.get(ENV['CHATBOT_URL'])
+    @last_reply = resp.body
+
+  end
+
+  def conversation_id
+    @convo_id
+  end
+
+  def latest
+    @message
+  end
+
+  def train(call,response)
+
+  end
+
+  def add_aiml(aiml)
+
+  end
+
+  def del_aiml(aiml)
+
+  end
+
+  def load_aiml(aiml)
+
+  end
+
+end

+ 80 - 0
engine/app/models/company.rb

@@ -0,0 +1,80 @@
+require 'uri'
+require 'json'
+require 'rubydns'
+require 'rubydns/system'
+class Company < ActiveRecord::Base
+  self.table_name = "companies"
+  has_many :email_leads, :foreign_key => :domain, :primary_key => :domain
+  has_many :phone_leads, :foreign_key => :domain, :primary_key => :domain
+  has_many :social_leads, :foreign_key => :domain, :primary_key => :domain
+  has_many :people, :foreign_key => :domain, :primary_key => :domain
+  scope :with_image, lambda { where.not(:image_url => nil) }
+  scope :by_state, lambda {|state| where(:state => state) }
+  scope :by_country, lambda {|state| where(:state => state) }
+  scope :by_domain, lambda {|domain| where(:domain => domain) }
+  scope :by_website, lambda {|website| where(:website => website) }
+  scope :by_employees, lambda {|employees| where(:employees_exact > employees) }
+  scope :by_revenue, lambda {|revenue| where(:revenue_exact > revenue) }
+  scope :by_industry, lambda {|industry| where(:industry => industry) }
+  scope :updated_recently, lambda {where(:updated_at => [24.hours.ago..Time.now]) }
+  scope :not_updated_recently, lambda {where(:updated_at => [1.year.ago..1.month.ago]) }
+  alias_attribute :display_name, :company_name
+  has_many :searches, class_name: "Searchjoy::Search", as: :convertable
+
+  searchkick callbacks: :async, suggest: [:industry], locations: ["location"],
+             conversions: ["unique_user_conversions", "total_conversions"],
+             index_name: "companies", similarity: "BM25", batch_size: 500
+
+def self.scrape
+  urls = Company.where(:note => "VALID").pluck(:website).uniq
+  urls.each do |url|
+    SpiderWorker.perform_async(url, 1, url)
+  end
+  puts "DONE"
+end
+
+  def self.scrub
+    user = User.first
+
+    find_each do |biz|
+      CompanyAnalysisWorker.perform_async(biz.id)
+      job_id = SecureRandom.hex(8)
+      SpiderWorker.perform_async(biz.website, user.id, job_id)
+    end
+  end
+
+
+  def keywords
+   self.description.to_s.split(" ").uniq
+  end
+
+  def self.to_csv(options = {})
+    csv_string = CSV.generate(options) do |csv|
+      csv << ["Company", "Industry", "Contact Name", "Contact Title", "City", "State", "Country", "Description"]
+      all.each do |record|
+         csv <<  [record.company_name, record.industry,  record.contact_person, record.contact_person_title, record.city, record.state, record.country, record.description]
+      end
+    end
+    csv_string
+  end
+
+
+ def search_data
+   as_json only: [:company_name, :address, :city, :state, :country, :website, :domain,
+    :contact_person, :contact_person_title, :important_people, :industry, :description]
+ end
+
+  def tags
+    self.description.split(" ").uniq
+  end
+
+  def score
+    missing = self.attributes.values.select(&:nil?).count
+    total = self.attributes.count
+    (((total.to_f - missing.to_f) / total.to_f) * 100).round(1)
+  end
+
+
+
+end
+

+ 0 - 0
engine/app/models/concerns/.keep


+ 91 - 0
engine/app/models/email_lead.rb

@@ -0,0 +1,91 @@
+require 'swot'
+class EmailLead < ActiveRecord::Base
+  validates_uniqueness_of :address, scope: :user_id
+  validate :mail_checker
+  belongs_to :website #, :foreign_key => :domain, :primary_key => :domain
+  belongs_to :user
+  after_create :validate_address
+  searchkick callbacks: :async #,  word_start: [:address, :domain], suggest: [:address] #, wordnet: true
+
+  # after_create :get_page_text
+  def get_page_text
+    ExtractWorker.perform_async("EmailLead", self.id) # unless Rails.env.development?
+  end
+
+  def validate_address
+      ValidationWorker.perform_async(self.address, self.user_id)
+  end
+
+  def mail_checker
+   MailChecker(self.address)
+  end
+
+
+  def score
+    missing = self.attributes.values.select(&:nil?).count
+    total = self.attributes.count
+    (((total.to_f - missing.to_f) / total.to_f) * 100).round(1)
+  end
+
+  def names
+    names = self.page_text.scan(/([A-Z][a-z]+(?=\s[A-Z])(?:\s[A-Z][a-z]+)+)/)
+    return names
+  end
+
+  def is_academic?
+    Swot::is_academic? self.address
+  end
+
+  def display_name
+    self.address
+  end
+
+  def self.to_csv(options = {})
+    csv_string = CSV.generate(options) do |csv|
+      csv << ["Email", "Tags", "Source", "Date"]
+      all.each do |record|
+        csv << [record.address, record.keywords, record.domain, record.created_at.to_date]
+      end
+    end
+    csv_string
+  end
+
+  def self.bulk_crawl(ids=[])
+
+    if ids.empty?
+      urls = Company.select(:website, :id).pluck(:website).uniq
+      else
+      urls = Company.select(:website, :id).find(ids).pluck(:website).uniq
+    end
+
+    user = User.last
+    puts ">> Loading #{urls.count} websites into crawler..."
+    urls.each do |url|
+      job_id = [user.id, "__", url].join
+      SpiderWorker.perform_async(url, user.id, job_id)
+    end
+    puts "-- #{urls.count} targets acquired --"
+  end
+
+  def email_type
+    if self.address[-3..-1] == "com"
+      type =  "Company"
+    elsif self.address[-3..-1] == "gov"
+      type =   "Government"
+    elsif self.address[-3..-1] == "org"
+      type =  "Non-Profit"
+    elsif self.address[-3..-1] == "edu"
+      type =  "Education"
+    elsif self.address[-3..-1] == "mil"
+      type =   "Military"
+    elsif self.address[-3..-1] == "net"
+      type =  "Technology"
+    else
+      type =  ""
+    end
+    type ||= self.domain[-1..-4]
+    return type
+  end
+
+end
+

+ 28 - 0
engine/app/models/face.rb

@@ -0,0 +1,28 @@
+class Face
+
+  def gender(image_url)
+
+  end
+
+  def ethnicity(image_url)
+
+  end
+
+  def age(image_url)
+
+  end
+
+  def rating(image_url)
+
+  end
+
+  def attractive?(image_url)
+
+  end
+
+  def similar?(image_url_1, image_url_2)
+
+  end
+
+
+end

+ 19 - 0
engine/app/models/graph.rb

@@ -0,0 +1,19 @@
+class Graph
+
+  def link(node_1, node_2)
+
+  end
+
+  def add(node)
+
+  end
+
+  def del(node)
+
+  end
+
+  def query(term)
+
+  end
+
+end

+ 87 - 0
engine/app/models/image_worker.rb

@@ -0,0 +1,87 @@
+
+class ImageWorker
+
+  def self.post(url, path, body={})
+    uri = URI.parse(url)
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new(path)
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+  def self.new_image_service
+    system("curl -X PUT 'http://#{ENV['API_HOST']}:8080/services/imageserv' -d '{\"mllib\":\"caffe\",\"description\":\"image classification service\",\"type\":\"supervised\",\"parameters\":{\"input\":{\"connector\":\"image\"},\"mllib\":{\"nclasses\":1000}},\"model\":{\"repository\":\"/opt/models/ggnet/\"}}'")
+  end
+
+
+  def self.predict_image(url)
+    body = {"service"=>"imageserv", "parameters"=>{"input"=>{"width"=>224, "height"=>224}, "output"=>{"best"=>3}}, "data"=>["#{url}"]}
+    uri = URI.parse("http://#{ENV['API_HOST']}:8080")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/predict")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+
+  def self.train_image(url, tags=[])
+    body = {"service"=>"imageserv", "async"=>true, "parameters"=>{"mllib"=>{"gpu"=>false, "net"=>{"batch_size"=>32}, "solver"=>{"test_interval"=>500, "iterations"=>30000, "base_lr"=>0.001, "stepsize"=>1000, "gamma"=>0.9}}, "input"=>{"connector"=>"image", "test_split"=>0.1, "shuffle"=>true, "width"=>224, "height"=>224}, "output"=>{"measure"=>["acc", "mcll", "f1"]}}, "data"=>tags}
+    uri = URI.parse("http://#{ENV['API_HOST']}:8080")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/train")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+
+  def self.ocr_image(url)
+    uri = URI.parse("http://#{ENV['API_HOST']}:9292")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/ocr")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  {:img_url => url, :worker => "tesseract"}.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+
+
+  def self.create_services
+    json = '{
+   "service":"imageserv",
+       "parameters":{
+         "mllib":{
+           "gpu":true
+         },
+         "input":{
+           "width":224,
+           "height":224
+         },
+         "output":{
+           "best":3,
+           "template":"{ {{#body}}{{#predictions}} \"uri\":\"{{uri}}\",\"categories\": [ {{#classes}} { \"category\":\"{{cat}}\",\"score\":{{prob}} } {{^last}},{{/last}}{{/classes}} ] {{/predictions}}{{/body}} }",
+           "network":{
+             "url":"your-elasticsearch-server.com/images/img",
+             "http_method":"POST"
+           }
+         }
+       },
+       "data":["http://i.ytimg.com/vi/0vxOhd4qlnA/maxresdefault.jpg"]
+     }'
+    result =   system("curl -XPOST 'http://localhost:8080/predict' -d #{json}")
+  end
+
+
+
+
+end

+ 6 - 0
engine/app/models/person.rb

@@ -0,0 +1,6 @@
+class Person < ActiveRecord::Base
+  belongs_to :company
+  alias_attribute :display_name, :name
+  searchkick callbacks: :async
+
+end

+ 110 - 0
engine/app/models/phone_lead.rb

@@ -0,0 +1,110 @@
+
+
+class PhoneLead < ActiveRecord::Base
+  validates_uniqueness_of :number, scope: :user_id
+  belongs_to :user
+  belongs_to :website #, :foreign_key => :domain, :primary_key => :domain
+  alias_attribute :display_name, :better_number
+  alias_attribute :location, :state
+  alias_attribute :website, :original_url
+  after_validation :geotag
+
+  scope :from_state, lambda {|state| where(:state => state) }
+  scope :from_country, lambda {|country| where(:country => country) }
+  scope :by_user, lambda {|user| where(:user_id => user) }
+  scope :by_domain, lambda {|domain| where(:domain => domain) }
+  # after_create :get_page_text
+  searchkick callbacks: :async
+
+  def get_page_text
+    ExtractWorker.perform_async("PhoneLead", self.id) # unless Rails.env.development?
+  end
+
+  def event_name
+    ["matched from ", self.domain].join
+  end
+
+  def tags
+    self.keywords.delete("[").delete("]").delete('"')
+  end
+
+  def icon
+    "phone"
+  end
+
+  def ux_color
+    "pink"
+  end
+
+  def fa_icon
+    "phone"
+  end
+
+  def display_name
+    self.better_number
+  end
+
+  def index_path
+    "/phone_leads"
+  end
+
+
+  def area_code
+    if self.number[0].to_s == "1"
+      area = self.number.to_s[1..3]
+    else
+      area = self.number.to_s[0..2]
+    end
+    area
+  end
+
+
+  def geotag
+
+    identifier = Phonelib.parse (self.number)
+
+    begin
+      state = Integer(self.area_code).to_region
+      self.location = state if state
+    rescue
+      # Ignore
+    end
+
+    if identifier
+      self.number_type = identifier.human_type
+      self.country = identifier.country
+      self.location ||= identifier.geo_name
+    else
+      self.destroy
+    end
+
+  end
+
+  def self.to_csv(options = {})
+    csv_string = CSV.generate(options) do |csv|
+      csv << ["Phone", "Tags", "Source", "Date"]
+      all.each do |record|
+        csv << [record.number, record.keywords, record.domain, record.created_at.to_date]
+      end
+    end
+    csv_string
+  end
+
+
+  def color
+    "orange"
+  end
+
+
+  def score
+    missing = self.attributes.values.select(&:nil?).count
+    total = self.attributes.count
+    (((total.to_f - missing.to_f) / total.to_f) * 100).round(1)
+  end
+
+  def better_number
+    self.number.phony_formatted(:normalize => :US, :spaces => '-')
+  end
+
+
+end

+ 86 - 0
engine/app/models/recon.rb

@@ -0,0 +1,86 @@
+require 'json'
+require 'uri'
+class Recon
+
+  def facts(entity)
+
+  end
+
+  def email(address)
+
+  end
+
+  def phone(number)
+
+  end
+
+  def profile(url)
+
+  end
+
+  def product(url)
+
+  end
+
+  def website(url)
+
+  end
+
+  def article(url)
+
+  end
+
+  def whois(domain)
+
+  end
+
+
+  def contacts(domain)
+    hash = {}
+    website = Website.where(:domain => domain).first
+    if website
+      emails = website.email_leads
+      phones = website.phone_leads
+      socials = website.social_leads
+      hash[:emails] = emails
+      hash[:social] = socials
+      hash[:phones] = phones
+    end
+    hash
+  end
+
+
+  def youtube(url)
+
+  end
+
+  def ograph_data(url)
+
+  end
+
+  def facebook(url)
+
+  end
+
+  def twitter(url)
+
+  end
+
+  def linkedin(url)
+
+  end
+
+  def instagram(url)
+
+  end
+
+  def pinterest(url)
+
+  end
+
+  def influence(username, network)
+
+  end
+
+
+end

+ 37 - 0
engine/app/models/resolver.rb

@@ -0,0 +1,37 @@
+
+require 'rubydns'
+require 'rubydns/system'
+class Resolver
+
+  def resolve(domains=[])
+
+    return false if domains.empty?
+
+    resolver = RubyDNS::Resolver.new(RubyDNS::System::nameservers)
+    dead = []
+    alive = []
+
+    EventMachine::run do
+
+     domains.each do |domain|
+          resolver.query(domain) do |response|
+            if response.answer.blank?
+              dead << domain
+            else
+              alive << domain
+            end
+          end
+     end
+
+     EventMachine::stop
+
+    end
+
+    results = {}
+    results[:dead] = dead
+    results[:alive] = alive
+    results
+
+  end
+end
+

+ 23 - 0
engine/app/models/score.rb

@@ -0,0 +1,23 @@
+class Score
+
+  def sentiment(text)
+
+  end
+
+  def reading_level(text)
+
+  end
+
+  def similarity(sample_1, sample_2)
+
+  end
+
+  def face(image_url)
+
+  end
+
+  def mood(text)
+
+  end
+
+end

+ 143 - 0
engine/app/models/social_lead.rb

@@ -0,0 +1,143 @@
+class SocialLead < ActiveRecord::Base
+
+  validates_uniqueness_of :profile_url, scope: :user_id
+  belongs_to :website #, :foreign_key => :domain, :primary_key => :domain
+  belongs_to :user
+
+  alias_attribute :display_name, :username
+  alias_attribute :location, :state
+
+
+  scope :by_user, lambda {|user| where(:user_id => user) }
+  scope :by_domain, lambda {|domain| where(:domain => domain) }
+  scope :is_facebook, lambda { where(:social_network => "facebook")}
+  scope :is_pinterest, lambda { where(:social_network => "pinterest")}
+  scope :is_twitter, lambda { where(:social_network => "twitter")}
+  scope :is_linkedin, lambda { where(:social_network => "linkedin")}
+  scope :is_instagram, lambda { where(:social_network => "instagram")}
+  scope :is_pinterest, lambda { where(:social_network => "pinterest")}
+  scope :is_google, lambda { where(:social_network => "google-plus")}
+  scope :is_okcupid, lambda { where(:social_network => "okcupid")}
+  scope :is_yelp, lambda { where(:social_network => "yelp")}
+  scope :is_github, lambda { where(:social_network => "github")}
+  scope :by_user_from_facebook, lambda {|user| where(:user_id => user, :social_network => "facebook") }
+  scope :by_user_from_twitter, lambda {|user| where(:user_id => user, :social_network => "twitter") }
+  scope :by_user_from_pinterest, lambda {|user| where(:user_id => user, :social_network => "pinterest") }
+  scope :by_user_from_github, lambda {|user| where(:user_id => user, :social_network => "github") }
+  scope :by_user_from_facebook, lambda {|user| where(:user_id => user, :social_network => "facebook") }
+  scope :from_state, lambda {|state| where(:state => state) }
+  scope :from_country, lambda {|country| where(:country => country) }
+  scope :from_network, lambda {|network| where(:social_network => network) }
+  scope :by_user_from_network, lambda {|user, network| where(:user_id => user, :social_network => network) }
+  searchkick callbacks: :async # ,  word_start: [:username, :domain], suggest: [:username] #, wordnet: true
+
+  # after_create :get_page_text
+  after_create :linkedin_scraper
+
+
+  def self.okcupid
+    file = File.open("/home/ubuntu/users.txt", "rb")
+    file.each_line do |line|
+      social = SocialLead.new(:username => line,
+                              :profile_url => "https://2-instant.okcupid.com/profile/#{line}",
+                              :source_url => "https://2-instant.okcupid.com/profile/#{line}",
+                              :social_network => "okcupid", :user_id => 1)
+      if social.save
+        puts social.id
+        ExtractWorker.perform_async("SocialLead", social.id)
+      end
+    end
+
+  end
+
+  def scrape_okc(usernames=[], user_id=1, filename="/home/ubuntu/users.txt")
+    unless filename.blank?
+      file = File.open(filename, "rb")
+      file.each_line do |line|
+        usernames << line
+      end
+    end
+
+    usernames.each do |username|
+      profile = SocialLead.new(:user_id => user_id, :social_network => "okcupid",
+                             :username=> username,
+                             :profile_url => "https://2-instant.okcupid.com/profile/#{username}",
+                             :source_url => "https://2-instant.okcupid.com/profile/#{username}")
+      if profile.save
+        ExtractWorker.perform_async("SocialLead", profile.id)
+      end
+    end
+
+  end
+
+  def display_name
+    "#{self.social_network}: #{self.username}"
+  end
+
+
+  def get_page_text
+    ExtractWorker.perform_async("SocialLead", self.id)
+  end
+
+  def tags
+    self.keywords.delete("[").delete("]").delete('"')
+  end
+
+  def icon
+    "hashtag"
+  end
+
+  def match_type
+    "social media profile"
+  end
+
+  def event_name
+    ["discovered on ", self.social_network].join
+  end
+
+  def ux_color
+    "purple"
+  end
+
+
+  def linkedin_scraper
+    if self.social_network == "linkedin" and Rails.env.production?
+      LinkedinWorker.perform_async(self.id)
+    end
+  end
+
+  def fa_icon
+    self.social_network
+  end
+
+  def index_path
+    "/social_leads"
+  end
+
+
+  def username
+   self.profile_url.split("/").last.downcase
+  end
+
+  def score
+    missing = self.attributes.values.select(&:nil?).count
+    total = self.attributes.count
+    (((total.to_f - missing.to_f) / total.to_f) * 100).round(1)
+  end
+
+  def color
+    "purple"
+  end
+
+  def self.to_csv(options = {})
+    csv_string = CSV.generate(options) do |csv|
+      csv << ["Username", "Social Network", "Tags", "Source", "Date"]
+      all.each do |record|
+        csv << [record.username, record.social_network, record.keywords, record.domain, record.created_at.to_date]
+      end
+    end
+    csv_string
+  end
+
+
+end

+ 68 - 0
engine/app/models/text_worker.rb

@@ -0,0 +1,68 @@
+
+class TextWorker
+  @@wl = WhatLanguage.new(:all)
+  @@tgr = EngTagger.new
+
+
+  def self.post(url, path, body={})
+    uri = URI.parse(url)
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new(path)
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+
+  def self.analyze_text(text)
+  if text
+    hash = {}
+    tagged = @@tgr.add_tags(text)
+    hash[:word_list] =  @@tgr.get_words(text)
+    hash[:nouns] = @@tgr.get_nouns(tagged)
+    hash[:proper_nouns] = @@tgr.get_proper_nouns(tagged)
+    hash[:past_tense_verbs] = @@tgr.get_past_tense_verbs(tagged)
+    hash[:adjectives] =  @@tgr.get_adjectives(tagged)
+    hash[:noun_phrases] = @@tgr.get_noun_phrases(tagged)
+    hash[:language] = @@wl.language(text)
+    hash[:languages_ranked] = @@wl.process_text(text)
+    hash[:profanity] = SadPanda.polarity (text)
+    hash[:emotion] = SadPanda.emotion (text)
+    hash[:reading_level] = Odyssey.coleman_liau (text)
+    return hash
+  else 
+    return false
+  end
+end
+
+
+def self.analyze_entities(text)
+  if text
+    entities = @@ner.perform(text)
+    if entities
+      return entities
+    else
+      return false
+    end
+  end
+end
+
+
+
+def self.analyze_name(first_name, last_name)
+  if first_name and last_name
+    hash = {}
+    hash[:gender] = Guess.gender(first_name.to_s.humanize)
+    hash[:ethnicity] = $races[last_name.to_s.upcase]
+    hash[:name] = [first_name, last_name].join(" ")
+    return hash
+  else
+    return false
+  end
+end
+
+
+
+end

+ 242 - 0
engine/app/models/user.rb

@@ -0,0 +1,242 @@
+
+class User < ActiveRecord::Base
+  include Redis::Objects
+
+  counter :api_credits, :start => 10000
+  counter :crawl_credits, :start => 150
+  counter :contact_credits, :start => 500
+  counter :mileage, :start => 0, :expiration => 1.minute
+
+  counter :active_engines, :start => 0, :expiration => 15.minutes
+  counter :bandwidth_used
+  counter :target_count
+  counter :search_count
+  counter :email_count
+  counter :phone_count
+  counter :social_count
+
+  set :job_ids
+  set :batch_ids
+  set :quicklist
+  set :email_addresses
+  set :phone_numbers
+  set :social_media_profiles
+
+  set :pages_crawled, :expiration => 1.minute
+  set :notifications, :expiration => 60.minutes
+
+  has_many :websites
+  has_many :email_leads,:through => :websites
+  has_many :phone_leads,:through => :websites
+  has_many :social_leads,:through => :websites
+
+  enum role: [:user, :admin, :trial, :basic, :advantage, :enterprise]
+
+
+
+
+  def available_engines
+    self.max_engines - self.active_engines.value
+  end
+
+
+  def add_to_prospects(record)
+    self.quicklist << record
+  end
+
+  def prospects(record)
+    self.quicklist.members
+  end
+
+
+  def total_contacts
+   (self.email_leads.count + self.phone_leads.count + self.social_leads.count)
+  end
+
+  def current_plan
+    if self.subscription and self.subscription.plan_id
+      self.subscription.plan_id
+    else
+      0
+    end
+  end
+
+  def max_targets
+    plan = self.current_plan
+    case plan
+      when 0
+        50
+      when 1
+        500
+      when 2
+        2000
+      when 3
+        10000
+      when 4
+        9999999999
+      else
+        50
+    end
+  end
+
+
+  def max_engines
+    plan = self.current_plan
+    case plan
+      when 0
+        4
+      when 1
+        8
+      when 2
+        16
+      when 3
+        32
+      else
+        4
+    end
+  end
+
+  def max_searches
+    plan = self.current_plan
+    case plan
+      when 0
+        5
+      when 1
+        250
+      when 2
+        1000
+      when 3
+        5000
+      when 4
+        9999999999
+      else
+        5
+    end
+  end
+
+
+  def max_pages
+    plan = self.current_plan
+    case plan
+      when 0
+        1000
+      when 1
+        20000
+      when 2
+        100000
+      when 3
+        1000000
+      when 4
+        10000000
+      else
+        1000
+    end
+  end
+
+  def max_validations
+    plan = self.current_plan
+    case plan
+      when 0
+        10
+      when 1
+        200
+      when 2
+        1000
+      when 3
+        10000
+      when 4
+        9999999999
+      else
+        10
+    end
+  end
+
+  def max_bandwidth
+    plan = self.current_plan
+    case plan
+      when 0
+        10000000
+      when 1
+        100000000
+      when 2
+        1000000000
+      when 3
+        10000000000
+      when 4
+        100000000000
+      else
+        10000000
+    end
+  end
+
+
+  def max_contacts
+    plan = self.current_plan
+    case plan
+      when 0
+        150
+      when 1
+        500
+      when 2
+        5000
+      when 3
+        10000
+      when 4
+        9999999999
+      else
+        150
+    end
+  end
+
+  def contacts
+    [self.email_leads, self.phone_leads, self.social_leads].flatten
+  end
+
+
+
+##########
+# BENCHMARK US!
+
+  def facebook_leads
+    self.social_leads.sort { |x| x.social_network == "facebook"}
+  end
+
+# VERSUS...
+
+  def twitter_leads
+    SocialLead.by_user(self.id).where(:social_network => "twitter")
+  end
+
+##########
+
+  def pinterest_leads
+    SocialLead.by_user(self.id).where(:social_network => "pinterest")
+  end
+
+
+  def linkedin_leads
+    SocialLead.by_user(self.id).where(:social_network => "linkedin")
+  end
+
+
+  def google_leads
+    SocialLead.by_user(self.id).where(:social_network => "google")
+  end
+
+  def github_leads
+    SocialLead.by_user(self.id).where(:social_network => "github")
+  end
+
+
+  def instagram_leads
+    SocialLead.by_user(self.id).where(:social_network => "instagram")
+  end
+
+
+  def new_record?
+    self.id.nil?
+  end
+
+
+
+end

+ 313 - 0
engine/app/models/web_worker.rb

@@ -0,0 +1,313 @@
+
+class WebWorker
+  @@validator = ValidationWorker.new
+
+
+  def self.validate_email_list(filepath)
+  domain="hushfling.com"
+  website = Website.find_or_initialize_by(:domain => domain, :user_id => 1)
+  website.save
+  counter = 0
+  file = File.open("/tmp/target_email_list.txt", "rb")
+    file.each do |line|
+      line.delete!("\n")
+      email = EmailLead.find_or_create_by(:address => line, :domain => "hushfling.com", :user_id => 1)
+      if email
+        puts email.id
+        puts email.address
+      end
+    end
+    puts counter
+    file.close
+  end
+
+  def self.post(url, path, body={})
+    uri = URI.parse(url)
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new(path)
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+  def self.new_image_service
+    system("curl -X PUT 'http://#{ENV['API_HOST']}:8080/services/imageserv' -d '{\"mllib\":\"caffe\",\"description\":\"image classification service\",\"type\":\"supervised\",\"parameters\":{\"input\":{\"connector\":\"image\"},\"mllib\":{\"nclasses\":1000}},\"model\":{\"repository\":\"/opt/models/ggnet/\"}}'")
+  end
+
+
+  def self.predict_image(url)
+    body = {"service"=>"imageserv", "parameters"=>{"input"=>{"width"=>224, "height"=>224}, "output"=>{"best"=>3}}, "data"=>["#{url}"]}
+    uri = URI.parse("http://#{ENV['API_HOST']}:8080")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/predict")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+  def self.predict_location(ip)
+    body = {"service"=>"imageserv", "parameters"=>{"input"=>{"width"=>224, "height"=>224}, "output"=>{"best"=>3}}, "data"=>["#{url}"]}
+    uri = URI.parse("http://#{ENV['API_HOST']}:8080")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/predict")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+
+    result = Curl.get("http://#{ENV['API_HOST']}/website/locate", {:url => "http://google.com"})
+    if result
+      response = result.body_str
+    end
+    expect(response).to be_truthy
+    expect(response).to have_content("CA")
+    puts response
+
+
+  end
+
+  def self.train_image(url, tags=[])
+    body = {"service"=>"imageserv", "async"=>true, "parameters"=>{"mllib"=>{"gpu"=>false, "net"=>{"batch_size"=>32}, "solver"=>{"test_interval"=>500, "iterations"=>30000, "base_lr"=>0.001, "stepsize"=>1000, "gamma"=>0.9}}, "input"=>{"connector"=>"image", "test_split"=>0.1, "shuffle"=>true, "width"=>224, "height"=>224}, "output"=>{"measure"=>["acc", "mcll", "f1"]}}, "data"=>tags}
+    uri = URI.parse("http://#{ENV['API_HOST']}:8080")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/train")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+
+  def self.ocr_image(url)
+    uri = URI.parse("http://#{ENV['API_HOST']}:9292")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/ocr")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  {:img_url => url, :worker => "tesseract"}.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+
+def self.analyze_text(text)
+  if text
+    hash = {}
+    tagged = @@tgr.add_tags(text)
+    hash[:word_list] =  @@tgr.get_words(text)
+    hash[:nouns] = @@tgr.get_nouns(tagged)
+    hash[:proper_nouns] = @@tgr.get_proper_nouns(tagged)
+    hash[:past_tense_verbs] = @@tgr.get_past_tense_verbs(tagged)
+    hash[:adjectives] =  @@tgr.get_adjectives(tagged)
+    hash[:noun_phrases] = @@tgr.get_noun_phrases(tagged)
+    hash[:language] = @@wl.language(text)
+    hash[:languages_ranked] = @@wl.process_text(text)
+    hash[:profanity] = SadPanda.polarity (text)
+    hash[:emotion] = SadPanda.emotion (text)
+    hash[:reading_level] = Odyssey.coleman_liau (text)
+    return hash
+  else 
+    return false
+  end
+end
+
+
+
+
+def self.crawl(url, user_id)
+  job_id = SecureRandom.hex(8)
+  qid = SpiderWorker.perform_async(url, user_id, job_id)
+  if qid
+    return qid
+  else
+    return false
+  end
+end
+
+
+
+
+  def self.get_page_rank(url)
+     googlerank = GooglePageRank.get(url)
+    if googlerank
+      return googlerank
+    else
+      return false
+    end
+  end
+
+  
+
+def self.extract_product(url)
+  if url
+    hash = {}
+    product  = Fletcher.fetch url
+    hash[:product_name] = product.name # => "Avenir Deluxe Unicycle (20-Inch Wheel)"
+    hash[:description] = product.description
+  # hash[:image] = product.image.src || nil
+    hash[:price] = product.price
+    hash
+  else
+    return false
+  end
+end
+
+
+
+def self.extract_entities(text)
+  if text
+    entities = @@ner.perform(text)
+    if entities
+      return entities
+    else
+      return false
+    end
+  end
+end
+
+def self.check_email(email_address)
+  if email_address
+    resp = EmailVerifier.check(email_address)
+    if resp
+      return resp
+    else
+      return false
+    end
+  end
+end
+
+  def self.create_services
+    json = '{
+   "service":"imageserv",
+       "parameters":{
+         "mllib":{
+           "gpu":true
+         },
+         "input":{
+           "width":224,
+           "height":224
+         },
+         "output":{
+           "best":3,
+           "template":"{ {{#body}}{{#predictions}} \"uri\":\"{{uri}}\",\"categories\": [ {{#classes}} { \"category\":\"{{cat}}\",\"score\":{{prob}} } {{^last}},{{/last}}{{/classes}} ] {{/predictions}}{{/body}} }",
+           "network":{
+             "url":"your-elasticsearch-server.com/images/img",
+             "http_method":"POST"
+           }
+         }
+       },
+       "data":["http://i.ytimg.com/vi/0vxOhd4qlnA/maxresdefault.jpg"]
+     }'
+  result =   system("curl -XPOST 'http://localhost:8080/predict' -d #{json}")
+  end
+
+
+def self.validate_email(email_address, user_id)
+  resp = @@validator.perform(email_address, user_id)
+  if resp
+    return resp
+  else
+    return false
+  end
+end
+
+
+def self.analyze_email(email_address)
+  if email_address
+    hash = {}
+    email_domain = email_address.to_s.split("@").last
+    school = Swot::school_name email_address
+    govt_domain = Gman.new email_address
+    hash[:domain] = email_domain
+    if school
+      hash[:academia] ||= school
+    end
+    if govt_domain
+      hash[:govt_agency] = govt_domain.agency
+      # hash[:domain] ||= govt_domain.domain
+      hash[:is_govt] = govt_domain.federal?
+      hash[:academia] ||= false
+    end
+    return hash
+  else
+    return false
+  end
+end
+
+def self.analyze_phone(phone_number)
+  if phone_number
+    hash = {}
+    identifier = Phonelib.parse(phone_number)
+    hash[:number] = phone_number.phony_formatted(:normalize => :US, :spaces => '-')
+    if phone_number[0].to_s == "1"
+      area = phone_number.to_s[1..3]
+    else
+      area = phone_number.to_s[0..2]
+    end
+    hash[:region] = Integer(area).to_region(:city => true)
+    hash[:type] = identifier.human_type
+    hash[:country] = identifier.country
+    hash[:location] = identifier.geo_name
+    return hash
+  else
+    return false
+  end
+end
+
+
+def self.analyze_name(first_name, last_name)
+  if first_name and last_name
+    hash = {}
+    hash[:gender] = Guess.gender(first_name.to_s.humanize)
+    hash[:ethnicity] = $races[last_name.to_s.upcase]
+    hash[:name] = [first_name, last_name].join(" ")
+    return hash
+  else
+    return false
+  end
+end
+
+def self.analyze_domain(domain_name)
+  if domain_name
+    url = "http://#{domain_name}"
+    hash = {}
+    doc = Pismo::Document.new url
+    whois_data = Whois.whois(domain_name)
+    googlerank = GooglePageRank.get(url)
+    meta = MetaInspector.new(url)
+    if doc and doc.title
+      hash[:title] = doc.title
+      hash[:author] = doc.author
+      hash[:meta_keywords] = doc.keywords
+      hash[:meta_description] =  doc.description
+    end
+    if whois_data
+      hash[:whois] = whois_data
+    end
+    if googlerank
+      hash[:google_links] = googlerank.to_s
+    end
+    if meta
+      hash[:meta] = meta.to_hash.to_s
+    end
+    return hash
+  else
+    return false
+  end
+end
+
+
+def self.social_shares(social_media_url)
+  if social_media_url
+    result = SocialShares.all social_media_url
+    return result
+  else
+    return false
+  end
+end
+
+end

+ 52 - 0
engine/app/models/website.rb

@@ -0,0 +1,52 @@
+class Website < ActiveRecord::Base
+  validates_uniqueness_of :domain, scope: :user_id
+  belongs_to :user
+
+  has_many :email_leads #, :foreign_key => :domain, :primary_key => :domain
+  has_many :phone_leads #, :foreign_key => :domain, :primary_key => :domain
+  has_many :social_leads #, :foreign_key => :domain, :primary_key => :domain
+  has_many :companies, :foreign_key => :domain, :primary_key => :domain
+  scope :by_user, lambda {|user| where(:user_id => user) }
+  scope :by_domain, lambda {|domain| where(:domain => domain) }
+  scope :is_active, lambda {  where(:is_active => true) }
+  scope :is_inactive, lambda {  where(:is_active => false) }
+  searchkick callbacks: :async
+ # after_create :whois_me
+
+  def locate_me
+    ip = IPSocket::getaddress(self.domain)
+    result = Curl.get("http://#{ENV['API_HOST']}:8882/locate?ip=#{ip}")
+    if result
+      self.update(:location => result.body_str)
+    end
+  end
+
+
+  def self.import(file)
+    spreadsheet = open_spreadsheet(file)
+    header = spreadsheet.row(1)
+    (2..spreadsheet.last_row).each do |i|
+      row = Hash[[header, spreadsheet.row(i)].transpose]
+      website = find_by_id(row["id"]) || new
+      website.attributes = row.to_hash.slice(*accessible_attributes)
+      website.save!
+    end
+  end
+
+  def self.open_spreadsheet(file)
+    case File.extname(file.original_filename)
+      when ".csv" then Csv.new(file.path, nil, :ignore)
+      when ".xls" then Excel.new(file.path, nil, :ignore)
+      when ".xlsx" then Excelx.new(file.path, nil, :ignore)
+      else raise "Unknown file type: #{file.original_filename}"
+    end
+  end
+
+
+
+  def whois_me
+      WhoisWorker.perform_async(self.domain)
+  end
+end
+
+

+ 313 - 0
engine/app/models/worker.rb

@@ -0,0 +1,313 @@
+
+class Worker
+  @@wl = WhatLanguage.new(:all)
+  @@tgr = EngTagger.new
+  @@validator = ValidationWorker.new
+
+
+  def self.validate_email_list(filepath)
+  domain="hushfling.com"
+  website = Website.find_or_initialize_by(:domain => domain, :user_id => 1)
+  website.save
+  counter = 0
+  file = File.open("/tmp/target_email_list.txt", "rb")
+    file.each do |line|
+      line.delete!("\n")
+      email = EmailLead.find_or_create_by(:address => line, :domain => "hushfling.com", :user_id => 1)
+      if email
+        puts email.id
+        puts email.address
+      end
+    end
+    puts counter
+    file.close
+  end
+
+
+  def self.post(url, path, body={})
+    uri = URI.parse(url)
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new(path)
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+  def self.new_image_service
+    system("curl -X PUT 'http://#{ENV['API_HOST']}:8081/services/imageserv' -d '{\"mllib\":\"caffe\",\"description\":\"image classification service\",\"type\":\"supervised\",\"parameters\":{\"input\":{\"connector\":\"image\"},\"mllib\":{\"nclasses\":1000}},\"model\":{\"repository\":\"/opt/models/ggnet/\"}}'")
+  end
+
+
+  def self.predict_image(url)
+    body = {"service"=>"imageserv", "parameters"=>{"input"=>{"width"=>224, "height"=>224}, "output"=>{"best"=>3}}, "data"=>["#{url}"]}
+    uri = URI.parse("http://#{ENV['API_HOST']}:8080")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/predict")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+  def self.predict_location(ip)
+    body = {"service"=>"imageserv", "parameters"=>{"input"=>{"width"=>224, "height"=>224}, "output"=>{"best"=>3}}, "data"=>["#{url}"]}
+    uri = URI.parse("http://#{ENV['API_HOST']}:8080")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/predict")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+    result = Curl.get("http://#{ENV['API_HOST']}/website/locate", {:url => "http://google.com"})
+    if result
+      response = result.body_str
+    end
+    expect(response).to be_truthy
+    expect(response).to have_content("CA")
+    puts response
+  end
+
+  def self.train_image(url, tags=[])
+    body = {"service"=>"imageserv", "async"=>true, "parameters"=>{"mllib"=>{"gpu"=>false, "net"=>{"batch_size"=>32}, "solver"=>{"test_interval"=>500, "iterations"=>30000, "base_lr"=>0.001, "stepsize"=>1000, "gamma"=>0.9}}, "input"=>{"connector"=>"image", "test_split"=>0.1, "shuffle"=>true, "width"=>224, "height"=>224}, "output"=>{"measure"=>["acc", "mcll", "f1"]}}, "data"=>tags}
+    uri = URI.parse("http://#{ENV['API_HOST']}:8080")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/train")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  body.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+
+  def self.ocr_image(url)
+    uri = URI.parse("http://#{ENV['API_HOST']}:9292")
+    http = Net::HTTP.new(uri.host, uri.port)
+    request = Net::HTTP::Post.new("/ocr")
+    request.add_field('Content-Type', 'application/json')
+    request.body =  {:img_url => url, :worker => "tesseract"}.to_json
+    response = http.request(request)
+    response.body
+  end
+
+
+
+def self.analyze_text(text)
+  if text
+    hash = {}
+    tagged = @@tgr.add_tags(text)
+    hash[:word_list] =  @@tgr.get_words(text)
+    hash[:nouns] = @@tgr.get_nouns(tagged)
+    hash[:proper_nouns] = @@tgr.get_proper_nouns(tagged)
+    hash[:past_tense_verbs] = @@tgr.get_past_tense_verbs(tagged)
+    hash[:adjectives] =  @@tgr.get_adjectives(tagged)
+    hash[:noun_phrases] = @@tgr.get_noun_phrases(tagged)
+    hash[:language] = @@wl.language(text)
+    hash[:languages_ranked] = @@wl.process_text(text)
+    hash[:profanity] = SadPanda.polarity (text)
+    hash[:emotion] = SadPanda.emotion (text)
+    hash[:reading_level] = Odyssey.coleman_liau (text)
+    return hash
+  else 
+    return false
+  end
+end
+
+
+
+
+def self.crawl(url, user_id)
+  job_id = SecureRandom.hex(8)
+  qid = SpiderWorker.perform_async(url, user_id, job_id)
+  if qid
+    return qid
+  else
+    return false
+  end
+end
+
+
+
+
+  def self.get_page_rank(url)
+     googlerank = GooglePageRank.get(url)
+    if googlerank
+      return googlerank
+    else
+      return false
+    end
+  end
+
+  
+
+def self.extract_product(url)
+  if url
+    hash = {}
+    product  = Fletcher.fetch url
+    hash[:product_name] = product.name # => "Avenir Deluxe Unicycle (20-Inch Wheel)"
+    hash[:description] = product.description
+  # hash[:image] = product.image.src || nil
+    hash[:price] = product.price
+    hash
+  else
+    return false
+  end
+end
+
+
+
+def self.extract_entities(text)
+  if text
+    entities = @@ner.perform(text)
+    if entities
+      return entities
+    else
+      return false
+    end
+  end
+end
+
+def self.check_email(email_address)
+  if email_address
+    resp = EmailVerifier.check(email_address)
+    if resp
+      return resp
+    else
+      return false
+    end
+  end
+end
+
+  def self.create_services
+    json = '{
+   "service":"imageserv",
+       "parameters":{
+         "mllib":{
+           "gpu":true
+         },
+         "input":{
+           "width":224,
+           "height":224
+         },
+         "output":{
+           "best":3,
+           "template":"{ {{#body}}{{#predictions}} \"uri\":\"{{uri}}\",\"categories\": [ {{#classes}} { \"category\":\"{{cat}}\",\"score\":{{prob}} } {{^last}},{{/last}}{{/classes}} ] {{/predictions}}{{/body}} }",
+           "network":{
+             "url":"your-elasticsearch-server.com/images/img",
+             "http_method":"POST"
+           }
+         }
+       },
+       "data":["http://i.ytimg.com/vi/0vxOhd4qlnA/maxresdefault.jpg"]
+     }'
+  result =   system("curl -XPOST 'http://localhost:8080/predict' -d #{json}")
+  end
+
+
+def self.validate_email(email_address, user_id)
+  resp = @@validator.perform(email_address, user_id)
+  if resp
+    return resp
+  else
+    return false
+  end
+end
+
+
+def self.analyze_email(email_address)
+  if email_address
+    hash = {}
+    email_domain = email_address.to_s.split("@").last
+    school = Swot::school_name email_address
+    govt_domain = Gman.new email_address
+    hash[:domain] = email_domain
+    if school
+      hash[:academia] ||= school
+    end
+    if govt_domain
+      hash[:govt_agency] = govt_domain.agency
+      # hash[:domain] ||= govt_domain.domain
+      hash[:is_govt] = govt_domain.federal?
+      hash[:academia] ||= false
+    end
+    return hash
+  else
+    return false
+  end
+end
+
+def self.analyze_phone(phone_number)
+  if phone_number
+    hash = {}
+    identifier = Phonelib.parse(phone_number)
+    hash[:number] = phone_number.phony_formatted(:normalize => :US, :spaces => '-')
+    if phone_number[0].to_s == "1"
+      area = phone_number.to_s[1..3]
+    else
+      area = phone_number.to_s[0..2]
+    end
+    hash[:region] = Integer(area).to_region(:city => true)
+    hash[:type] = identifier.human_type
+    hash[:country] = identifier.country
+    hash[:location] = identifier.geo_name
+    return hash
+  else
+    return false
+  end
+end
+
+
+def self.analyze_name(first_name, last_name)
+  if first_name and last_name
+    hash = {}
+    hash[:gender] = Guess.gender(first_name.to_s.humanize)
+    hash[:ethnicity] = $races[last_name.to_s.upcase]
+    hash[:name] = [first_name, last_name].join(" ")
+    return hash
+  else
+    return false
+  end
+end
+
+def self.analyze_domain(domain_name)
+  if domain_name
+    url = "http://#{domain_name}"
+    hash = {}
+    doc = Pismo::Document.new url
+    whois_data = Whois.whois(domain_name)
+    googlerank = GooglePageRank.get(url)
+    meta = MetaInspector.new(url)
+    if doc and doc.title
+      hash[:title] = doc.title
+      hash[:author] = doc.author
+      hash[:meta_keywords] = doc.keywords
+      hash[:meta_description] =  doc.description
+    end
+    if whois_data
+      hash[:whois] = whois_data
+    end
+    if googlerank
+      hash[:google_links] = googlerank.to_s
+    end
+    if meta
+      hash[:meta] = meta.to_hash.to_s
+    end
+    return hash
+  else
+    return false
+  end
+end
+
+
+def self.social_shares(social_media_url)
+  if social_media_url
+    result = SocialShares.all social_media_url
+    return result
+  else
+    return false
+  end
+end
+
+end

+ 39 - 0
engine/app/workers/bulk_extract_worker.rb

@@ -0,0 +1,39 @@
+require "tika/app"
+class BulkExtractWorker
+  include Sidekiq::Worker
+  include Sidekiq::Status::Worker
+  include Sidekiq::Benchmark::Worker
+
+  sidekiq_options :queue => 'default', :retry => false, :backtrace => true, expires_in: 1.hour
+
+  def perform(source_urls=[], user_id)
+    benchmark.bulk_extraction_metric do
+      total source_urls.count
+      source_urls.each do |src_url|
+        social_records = SocialLead.where(:user_id => user_id, :source_url => src_url)
+        phone_records = PhoneLead.where(:user_id => user_id, :source_url => src_url)
+        email_records = EmailLead.where(:user_id => user_id, :source_url => src_url)
+        records = [social_records,phone_records,email_records].flatten
+        if records
+          resource = Tika::Resource.new(src_url)
+          if resource and resource.text
+            counter = 0
+             records.each do |record|
+               counter += 1
+               at counter, ">> #{record.source_url}"
+               record.update(:page_text => resource.text)
+            end
+          end
+        end
+      end
+    end
+    benchmark.finish
+  end
+end
+
+
+
+
+
+
+

+ 25 - 0
engine/app/workers/company_analysis_worker.rb

@@ -0,0 +1,25 @@
+class CompanyAnalysisWorker
+  include Sidekiq::Worker
+  sidekiq_options :retry => false, :backtrace => true
+
+  def perform(id)
+    company = Company.find(id)
+    if company
+      meta = MetaInspector.new(company.website, :allow_redirections => false, faraday_options: { ssl: { verify: false } }, :connection_timeout => 5, :read_timeout => 5, :retries => 0)
+      if meta and meta.images and meta.images.best
+        company.image_url = meta.images.best
+        company.save
+      end
+    end
+  end
+
+  def scrape(id)
+    company = Company.find(id)
+    if company
+        job_id = ["scrape_company_", company.id, "_job_", SecureRandom.hex(8)].join
+        SpiderWorker.perform_async(company.website, 1, job_id)
+    end
+  end
+
+end
+

+ 16 - 0
engine/app/workers/company_bulk_validator_worker.rb

@@ -0,0 +1,16 @@
+class CompanyBulkValidatorWorker
+  include Sidekiq::Worker
+  sidekiq_options :retry => false, :backtrace => true, :queue => 'evil'
+
+  def perform(domain)
+    companies = Company.where(:domain => domain)
+    if companies
+        companies.each do |company|
+          company.update(:note => "IAMOK")
+      end
+    end
+    end
+end
+
+
+

+ 22 - 0
engine/app/workers/dark_worker.rb

@@ -0,0 +1,22 @@
+class DarkWorker
+  include Sidekiq::Worker
+  sidekiq_options   :retry => true, :backtrace => true
+
+  def perform(email_address)
+
+      unless email_address.match("gmail.com") or email_address.match("yahoo.com") or email_address.match("hotmail.com") or email_address.match("msn.com") or email_address.match("aol.com")
+          checker = ["ubuntu", $public_hostname].join("@")
+          validator = EmailAuthentication::Base.new
+          result = validator.check(email_address, checker)
+          if result
+            email = EmailLead.find_or_initialize_by(:address => email_address, :user_id => 1)
+            email.smtp_reply = result[1].to_s.first(255)
+            email.is_valid = true
+            email.save
+          end
+      end
+    end
+  end
+
+
+

+ 39 - 0
engine/app/workers/dns_worker.rb

@@ -0,0 +1,39 @@
+
+require 'rubydns'
+require 'rubydns/system'
+class DnsWorker
+  include Sidekiq::Worker
+  sidekiq_options :retry => false, :backtrace => true
+
+  def perform(domains=[])
+
+    return false if domains.blank?
+
+    resolver = RubyDNS::Resolver.new(RubyDNS::System::nameservers)
+    dead = []
+    alive = []
+
+    EventMachine::run do
+
+      domains.each do |domain|
+        resolver.query(domain) do |response|
+          if response.answer.blank?
+            dead << domain
+          else
+            alive << domain
+          end
+        end
+      end
+
+      EventMachine::stop
+
+    end
+
+    results = {}
+    results[:dead] = dead
+    results[:alive] = alive
+    results
+
+  end
+end
+

+ 38 - 0
engine/app/workers/extract_worker.rb

@@ -0,0 +1,38 @@
+require "tika/app"
+class ExtractWorker
+  include Sidekiq::Worker
+  include Sidekiq::Status::Worker
+  include Sidekiq::Benchmark::Worker
+
+  sidekiq_options :queue => 'default', :retry => false, :backtrace => true, expires_in: 1.hour
+
+  def perform(klass, id)
+    benchmark.extraction_metric do
+      if klass == "EmailLead"
+        record = EmailLead.find(id)
+      elsif klass == "PhoneLead"
+        record = PhoneLead.find(id)
+      elsif klass == "SocialLead"
+        record = SocialLead.find(id)
+      end
+
+      if record
+        resource = Tika::Resource.new(record.source_url)
+        if resource
+          text = resource.text
+          if text
+            record.update(:page_text => text)
+          end
+        end
+      end
+    end
+    benchmark.finish
+  end
+end
+
+
+
+
+
+
+

+ 28 - 0
engine/app/workers/geotag_worker.rb

@@ -0,0 +1,28 @@
+class GeotagWorker
+  include Sidekiq::Worker
+  sidekiq_options  :retry => true, :backtrace => true #, expires_in: 1.day
+  def perform(id)
+    record = PhoneLead.find(id)
+    identifier = Phonelib.parse (record.number)
+
+    begin
+      state = Integer(record.area_code).to_region
+      if state
+        record.location = state
+      end
+    rescue
+      # Ignore
+    end
+
+    if identifier
+      record.number_type = identifier.human_type
+      record.country = identifier.country
+      record.state ||= identifier.geo_name
+    end
+
+    record.save
+
+  end
+
+end
+

+ 36 - 0
engine/app/workers/instagram_worker.rb

@@ -0,0 +1,36 @@
+class LinkedinWorker
+  include Sidekiq::Worker
+  sidekiq_options :retry => false, :backtrace => true
+
+  def perform(id)
+
+    record = SocialLead.find(id)
+    if record
+    profile = Linkedin::Profile.new(record.profile_url) # , { company_details: true, open_timeout: 30, proxy_ip: '127.0.0.1', proxy_port: '3128', username: 'user', password: 'pass' })
+      if profile
+        record.first_name = profile.first_name          # The first name of the contact
+        record.last_name = profile.last_name           # The last name of the contact
+        record.description = profile.summary      # The summary of the profile
+        record.location = profile.location            # The location of the contact
+        record.country = profile.country             # The country of the contact
+        record.image_url = profile.picture             # The profile picture link of profile
+        record.name = profile.name
+
+       # record.title = profile.title               # The full name of the profile
+
+
+       # record.industry = profile.industry            # The domain for which the contact belongs
+       # record.skills = profile.skills.to_s            # Array of skills of the profile
+       #  record.organization = profile.organizations.to_s     # Array organizations of the profile
+       # record.education = profile.education.to_s      # Array of hashes for education
+       # record.websites = profile.websites.to_s         # Array of websites
+       # record.interests = profile.groups.to_s              # Array of groups
+       # record.followers = profile.number_of_connections # The number of connections as a string
+        record.save
+
+      end
+    end
+
+
+  end
+end

+ 91 - 0
engine/app/workers/lead_worker.rb

@@ -0,0 +1,91 @@
+class LeadWorker
+  include Sidekiq::Worker
+  include Sidekiq::Benchmark::Worker
+  sidekiq_options :queue => 'default', :retry => false, :backtrace => true# , expires_in: 3.days
+
+  def perform(addresses, numbers, profiles, user_id, website_id, domain)
+    unless addresses.blank? and numbers.blank? and profiles.blank?
+      source_urls = []
+
+      if profiles
+        profiles.each do |profile|
+          source_urls << profile[1]
+        end
+
+        social_batch = Sidekiq::Batch.new
+        social_batch.description =  "user_#{user_id}__#{domain}_social_leads"
+        benchmark.social_batch_jobs_metric do
+          social_batch.jobs do
+          profiles.each do |profile|
+            username = profile[0].split("/").last
+            if profile[0].match("google")
+              network = "google-plus"
+            end
+            network ||= profile[0].split("//").last.split(".com").first
+            new_social =  SocialLead.create(:website_id => website_id,
+                                            :social_network => network,
+                                            :username => username,
+                                            :profile_url => profile[0],
+                                            :source_url => profile[1],
+                                            :user_id => user_id,
+                                            :domain => domain)
+            if new_social and new_social.id
+              PostSpiderWorker.perform_async(new_social.id, "SocialLead")
+              TextAnalysisWorker.perform_async("SocialLead", new_social.id)
+            end
+          end
+        end
+        end
+
+      end
+      if numbers
+        numbers.each do |num|
+          source_urls << num[1]
+        end
+        phone_batch = Sidekiq::Batch.new
+        phone_batch.description =  "user_#{user_id}__#{domain}_phone_leads"
+        benchmark.phone_batch_jobs_metric do
+          phone_batch.jobs do
+          numbers.each do |number|
+            new_phone = PhoneLead.create(:number => number[0],
+                                         :website_id => website_id,
+                                         :user_id => user_id,
+                                         :domain => domain,
+                                         :source_url => number[1])
+            if new_phone and new_phone.id
+              PostSpiderWorker.perform_async(new_phone.id, "PhoneLead")
+              TextAnalysisWorker.perform_async("PhoneLead", new_phone.id)
+            end
+          end
+          end
+        end
+
+      end
+      if addresses
+        addresses.each do |address|
+          source_urls << address[1]
+        end
+        address_batch = Sidekiq::Batch.new
+        address_batch.description = "user_#{user_id}_#{domain}__email_leads"
+        benchmark.email_batch_jobs_metric do
+          address_batch.jobs do
+          addresses.each do |address|
+            new_email = EmailLead.create(:website_id => website_id,
+                                         :address => address[0],
+                                         :user_id => user_id,
+                                         :source_url => address[1],
+                                         :domain => domain)
+            if new_email and new_email.id
+              PostSpiderWorker.perform_async(new_email.id, "EmailLead")
+              TextAnalysisWorker.perform_async("EmailLead", new_email.id)
+            end
+          end
+          end
+        end
+      end
+      source_urls.uniq!
+      BulkExtractWorker.perform_async(source_urls, user_id)
+      benchmark.finish
+    end
+  end
+end

+ 38 - 0
engine/app/workers/linkedin_worker.rb

@@ -0,0 +1,38 @@
+class LinkedinWorker
+  include Sidekiq::Worker
+  sidekiq_options :retry => false, :backtrace => true
+
+  def perform(id)
+
+    record = SocialLead.find(id)
+    if record
+    profile = Linkedin::Profile.new(record.profile_url) # , { company_details: true, open_timeout: 30, proxy_ip: '127.0.0.1', proxy_port: '3128', username: 'user', password: 'pass' })
+      if profile
+        record.first_name = profile.first_name          # The first name of the contact
+        record.last_name = profile.last_name           # The last name of the contact
+        record.description = profile.summary      # The summary of the profile
+        record.location = profile.location            # The location of the contact
+        record.country = profile.country             # The country of the contact
+        record.image_url = profile.picture             # The profile picture link of profile
+        record.name = profile.name
+
+       # record.title = profile.title               # The full name of the profile
+
+
+       # record.industry = profile.industry            # The domain for which the contact belongs
+       # record.skills = profile.skills.to_s            # Array of skills of the profile
+       #  record.organization = profile.organizations.to_s     # Array organizations of the profile
+       # record.education = profile.education.to_s      # Array of hashes for education
+       # record.websites = profile.websites.to_s         # Array of websites
+       # record.interests = profile.groups.to_s              # Array of groups
+       # record.followers = profile.number_of_connections # The number of connections as a string
+        record.save
+
+      end
+    end
+
+
+  end
+end
+
+

+ 92 - 0
engine/app/workers/namemail_worker.rb

@@ -0,0 +1,92 @@
+class NamemailWorker
+  include Sidekiq::Worker
+  sidekiq_options   :queue => 'validation', :retry => false, :backtrace => true #, expires_in: 1.day
+
+  def perform(first, last, domain)
+    checker = ["hello", $public_hostname].join("@")
+    validator = EmailAuthentication::Base.new
+    combos = self.permutate(first,last,domain)
+    combos.each do |possible_email|
+      result = validator.check(possible_email, checker)
+      if result and result[0]
+        email = EmailLead.new(:address => possible_email, :user_id => 1, :domain => domain)
+        email.smtp_reply = result[1].to_s.first(255)
+        email.is_valid = true
+        email.save
+        puts "#{email.address} is valid!"
+      end
+    end
+  end
+
+
+  def permutate(first_name, last_name, domain)
+
+    first_initial = first_name[0]
+    last_initial = last_name[0]
+
+    # Define each name permutation manually
+    name_permutations = <<PERMS
+{first_name}
+{last_name}
+{first_initial}
+{last_initial}
+{first_name}{last_name}
+{first_name}.{last_name}
+{first_initial}{last_name}
+{first_initial}.{last_name}
+{first_name}{last_initial}
+{first_name}.{last_initial}
+{first_initial}{last_initial}
+{first_initial}.{last_initial}
+{last_name}{first_name}
+{last_name}.{first_name}
+{last_name}{first_initial}
+{last_name}.{first_initial}
+{last_initial}{first_name}
+{last_initial}.{first_name}
+{last_initial}{first_initial}
+{last_initial}.{first_initial}
+{first_name}-{last_name}
+{first_initial}-{last_name}
+{first_name}-{last_initial}
+{first_initial}-{last_initial}
+{last_name}-{first_name}
+{last_name}-{first_initial}
+{last_initial}-{first_name}
+{last_initial}-{first_initial}
+{first_name}_{last_name}
+{first_initial}_{last_name}
+{first_name}_{last_initial}
+{first_initial}_{last_initial}
+{last_name}_{first_name}
+{last_name}_{first_initial}
+{last_initial}_{first_name}
+{last_initial}_{first_initial}
+PERMS
+
+    # substitutions to get all permutations to an Array
+    name_permutations = name_permutations.gsub('{first_name}', first_name)
+                            .gsub('{last_name}', last_name)
+                            .gsub('{first_initial}', first_initial)
+                            .gsub('{last_initial}', last_initial)
+                            .split($/)
+
+    # accept domain arg to be a string or an array
+    # %40 => @
+    if domain.is_a? String
+      domain = ['@'].product domain.split
+    elsif domain.is_a? Array
+      domain = ['@'].product domain
+    else
+      raise ArgumentError, 'Domain was neither a String or Array'
+    end
+
+    name_and_domains = name_permutations.product domain
+
+    # combine names and domains
+    # return permuations
+    permutations = name_and_domains.map {|email| email.join }
+    permutations
+  end
+
+end

+ 16 - 0
engine/app/workers/ocr_worker.rb

@@ -0,0 +1,16 @@
+class OcrWorker
+  include Sidekiq::Worker
+  sidekiq_options :retry => false, :backtrace => true
+
+  def perform(image_url)
+      image_text = Curl.post("http://#{ENV['API_HOST']}:8008/ocr", {:img_url => image_url})
+      if image_text and image_text.body
+        entities = Curl.post("http://#{ENV['API_HOST']}:5000/api", {:text => image_text.body, :worker => "tesseract"})
+        if entities
+          return entities.body
+        end
+      end
+  end
+
+end
+

+ 156 - 0
engine/app/workers/okcupid_worker.rb

@@ -0,0 +1,156 @@
+class OkcupidWorker
+  include Sidekiq::Worker
+  include Sidekiq::Status::Worker
+  include Sidekiq::Benchmark::Worker
+
+  sidekiq_options :queue => 'okcupid', :retry => false, :backtrace => true
+
+
+  def perform(usernames=[])
+      urls = []
+      @profiles = []
+      @addresses = []
+      @numbers = []
+
+      @current_user = User.find(10)
+      usernames.each do |username|
+        urls << "https://2-instant.okcupid.com/profile/#{username}"
+      end
+
+      @logger = Logger.new(STDOUT)
+      @storage = nil
+      @options = {
+          :redis_options => {
+              :host => 'localhost',
+              :driver => 'hiredis',
+              :db => 11},
+          :depth_limit => 2,
+          :discard_page_bodies => false,
+          # HTTP read timeout in seconds
+          :read_timeout => 30,
+          # HTTP open connection timeout in seconds
+          :open_timeout => 10,
+          :obey_robots_txt => false,
+          :logger => @logger,
+          :skip_query_strings => false,
+          :user_agent => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9) AppleWebKit/537.71 (KHTML, like Gecko) Version/7.0 Safari/537.71",
+          :enable_signal_handler => false,
+          :workers => 5,
+          :redirect_limit => 2,
+          :storage => @storage
+      }
+ 
+       Polipus.crawler(job_id, urls, @options) do |crawler|
+
+        crawler.skip_links_like(/\/versions\//)
+        crawler.skip_links_like(/\.pdf$/)
+        crawler.skip_links_like(/\.zip$/)
+        crawler.skip_links_like(/\.jpg$/)
+        crawler.skip_links_like(/\.png$/)
+        crawler.skip_links_like(/\.PDF$/)
+        crawler.skip_links_like(/\.JPG$/)
+        crawler.skip_links_like(/\.PNG$/)
+        crawler.skip_links_like(/\.GIF$/)
+        crawler.skip_links_like(/\.EXE$/)
+        crawler.skip_links_like(/\.gif$/)
+        crawler.skip_links_like(/\.exe$/)
+        crawler.skip_links_like(/\.mpg$/)
+        crawler.skip_links_like(/\.avi$/)
+        crawler.skip_links_like(/\.mp4$/)
+        crawler.skip_links_like(/\.mpeg$/)
+        crawler.skip_links_like(/\/images\//)
+
+        crawler.on_page_downloaded do |crawled_page|
+          if crawled_page.success? and crawled_page.body
+
+            profile = SocialLead.new(:user_id => 10, :social_network => "okcupid",
+                                     :username=> crawled_page.url.split("/profile/").last)
+            profile.save
+            body_text = crawled_page.body.force_encoding('UTF-8') || crawled_page.body
+
+            if body_text
+
+              # Phone Numbers
+              body_text.scan(/\(?([0-9]{3})\)?([ .-]?)([0-9]{3})\2([0-9]{4})/).each do |phone_number|
+                if phone_number
+                  phone_number = phone_number.to_s.scan(/\d/).join
+                  @numbers << [phone_number, crawled_page.url]
+                end
+              end
+
+              # Email Addresses
+              body_text.scan(/[\w\d]+[\w\d.-]@[\w\d.-]+\.\w{2,6}/).each do |address|
+                if address
+                  @addresses << [address.to_s.downcase, crawled_page.url]
+                end
+              end
+
+              # LinkedIn Profiles
+              [body_text.scan(/(?<=linkedin.com\/in\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=www.linkedin.com\/in\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=www.linkedin.com\/pub\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=linkedin.com\/pub\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=linkedin.com\/in\/)[a-z0-9_-]{3,16}/)].flatten.each do |linkedin|
+                if linkedin
+                  @profiles << ["http://linkedin.com/in/#{linkedin.downcase}", crawled_page.url]
+                end
+              end
+
+              # Google+ Profiles
+              body_text.scan(/(?<=plus.google.com\/)[a-z0-9_-]{3,16}/).each do |googleplus|
+                if googleplus
+                  @profiles << ["http://plus.google.com/+#{googleplus.downcase}", crawled_page.url]
+                end
+              end
+
+              # Instagram Profiles
+              body_text.scan(/(?<=instagram.com\/)[a-z0-9_-]{3,16}/).each do |instagram|
+                if instagram
+                  @profiles << ["http://instagram.com/#{instagram.downcase}", crawled_page.url]
+                end
+              end
+
+              # Pinterest Profiles
+              body_text.scan(/(?<=pinterest.com\/)[a-z0-9_-]{3,16}/).each do |pinterest|
+                if pinterest
+                  @profiles << ["http://pinterest.com/#{pinterest.downcase}", crawled_page.url]
+                end
+              end
+
+              # Github Profiles
+              body_text.scan(/github\.com(?:\/\#!)?\/(\w+)/i).each do |github|
+                if github
+                  @profiles << ["http://github.com/#{github.join.downcase}", crawled_page.url]
+                end
+              end
+
+              # Twitter Profiles
+              body_text.scan(/twitter\.com(?:\/\#!)?\/(\w+)/i).each do |twitter|
+                if twitter and !twitter.join.match(".php")
+                  @profiles << ["http://twitter.com/#{twitter.join.downcase}", crawled_page.url]
+                end
+              end
+
+              # Facebook Profiles
+              body_text.scan(/(?:https?:\/\/)?(?:www\.)?facebook\.com\/(?:(?:\w)*#!\/)?(?:pages\/)?(?:[\w\-]*\/)*([\w\-\.]*)/).each do |facebook|
+                if facebook and !facebook.to_s.match(".php")
+                  @profiles << ["http://facebook.com/#{facebook.join.downcase}", crawled_page.url]
+                end
+              end
+          end
+          end
+        end
+
+        crawler.on_crawl_end do
+          unless @addresses.empty? and @profiles.empty? and @numbers.empty?
+            @addresses.uniq! { |a| a.first }
+            @profiles.uniq! { |p| p.first }
+            @numbers.uniq! { |n| n.first }
+            LeadWorker.perform_async(@addresses, @numbers, @profiles, @current_user.id, 1, "okcupid.com")
+          end
+        end
+      end
+    end
+  end
+
+
+
+
+
+

+ 31 - 0
engine/app/workers/post_spider_worker.rb

@@ -0,0 +1,31 @@
+require 'json'
+class PostSpiderWorker
+  include Sidekiq::Worker
+  include Sidekiq::Benchmark::Worker
+
+  sidekiq_options :queue => 'default', :retry => false, :backtrace => true, expires_in: 1.hour
+
+  def perform(record_id, record_klass)
+    benchmark.meta_scraper_metric do
+     case record_klass
+      when "EmailLead"
+        record = EmailLead.find(record_id)
+      when "PhoneLead"
+        record = PhoneLead.find(record_id)
+      when "SocialLead"
+        record = SocialLead.find(record_id)
+      end
+
+    if record and record.source_url
+      meta = MetaInspector.new record.source_url
+      if meta
+        record.keywords =  meta.meta_tag['name']['keywords']
+        record.description = meta.description
+        record.image_url = meta.images.best
+        record.save
+      end
+      end
+    end
+    benchmark.finish
+   end
+end

+ 20 - 0
engine/app/workers/score_worker.rb

@@ -0,0 +1,20 @@
+class ScoreWorker
+  include Sidekiq::Worker
+  sidekiq_options   :queue => 'score', :retry => false, :backtrace => true, expires_in: 1.hour
+
+def perform(email)
+  {
+      twitter_followers_weight:   0.05,
+      angellist_followers_weight: 0.05,
+      klout_score_weight:         0.05,
+      company_twitter_followers_weight: 0.05,
+      company_alexa_rank_weight:  0.000005,
+      company_google_rank_weight: 0.05,
+      company_employees_weight:   0.5,
+      company_raised_weight:      0.0000005,
+      company_score:              10,
+      total_score:                1415
+  }
+end
+
+end

+ 283 - 0
engine/app/workers/spider_worker.rb

@@ -0,0 +1,283 @@
+require 'objspace'
+class SpiderWorker
+  include Sidekiq::Worker
+  include Sidekiq::Status::Worker
+  include Sidekiq::Benchmark::Worker
+
+
+  sidekiq_options :queue => 'crawler', :retry => false, :backtrace => true, expires_in: 1.hour #, throttle: { threshold: 10, period: 1.minute, key: ->(user_id){ user_id } }
+
+
+  def perform(url, user_id, job_id)
+    benchmark.spider_metric do
+
+      @max_pages = 1000
+      total @max_pages
+      @profiles = []
+      @addresses = []
+      @numbers = []
+      @original_url = url
+      @pages = 0
+      @current_user = User.find(user_id)
+      @end_time = Time.now + 15.minutes
+
+      if @current_user
+        #if @current_user.websites.count > @current_user.max_targets
+        #  return "[ERROR]  Max reached for user #{user_id}"
+        #else
+        @current_user.active_engines.incr
+        @current_user.job_ids << job_id
+        #end
+      else
+        return "[ERROR] User #{user_id} does not exist"
+      end
+
+      url.gsub!(" ", "")
+      uri = URI.parse(url)
+      @original_domain = uri.host || url.rpartition("://")[2].rpartition("/")[0]
+      @website = Website.find_or_initialize_by(:domain => @original_domain, :user_id => user_id)
+      @website.url = url
+      @website.save
+      @logger = Logger.new(STDOUT)
+
+      @client = Elasticsearch::Client.new(url: ENV['ELASTICSEARCH_URL'], logger: @logger)
+      @client.transport.logger.level = Logger::WARN
+      if @current_user and @current_user.admin == true
+        @storage = Polipus::Storage::ElasticSearchStore.new(
+            @client,
+            refresh: true
+        )
+        @storage.include_query_string_in_uuid = true
+      else
+        @storage = nil
+      end
+
+
+
+      @options = {
+          :redis_options => {
+              :host => 'localhost',
+              :driver => 'hiredis',
+              :db => 11},
+          :depth_limit => 4,
+          :discard_page_bodies => false,
+          # HTTP read timeout in seconds
+          :read_timeout => 10,
+          # HTTP open connection timeout in seconds
+          :open_timeout => 10,
+          :obey_robots_txt => false,
+          :logger => @logger,
+          :skip_query_strings => false,
+          :user_agent => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9) AppleWebKit/537.71 (KHTML, like Gecko) Version/7.0 Safari/537.71",
+          :enable_signal_handler => false,
+          :workers => 4,
+          :redirect_limit => 4,
+          # :ttl => 900,
+          :storage => @storage
+      }
+
+
+
+
+      Polipus.crawler(job_id, url, @options) do |crawler|
+
+        crawler.skip_links_like(/\/versions\//)
+        crawler.skip_links_like(/\.pdf$/)
+        crawler.skip_links_like(/\.zip$/)
+        crawler.skip_links_like(/\.jpg$/)
+        crawler.skip_links_like(/\.png$/)
+        crawler.skip_links_like(/\.PDF$/)
+        crawler.skip_links_like(/\.JPG$/)
+        crawler.skip_links_like(/\.PNG$/)
+        crawler.skip_links_like(/\.GIF$/)
+        crawler.skip_links_like(/\.EXE$/)
+        crawler.skip_links_like(/\.gif$/)
+        crawler.skip_links_like(/\.exe$/)
+        crawler.skip_links_like(/\.mpg$/)
+        crawler.skip_links_like(/\.avi$/)
+        crawler.skip_links_like(/\.mp4$/)
+        crawler.skip_links_like(/\.mpeg$/)
+        crawler.skip_links_like(/\/images\//)
+
+        crawler.on_page_downloaded do |crawled_page|
+          @current_user.mileage.increment
+          @current_user.pages_crawled.add crawled_page.url
+
+          @pages += 1
+          at @pages, "#{crawled_page.url}"
+          if crawled_page.success?
+            # @current_user.bandwidth_used.incr(ObjectSpace.memsize_of(crawled_page.body))
+            if crawled_page.doc and crawled_page.doc.at('body')
+              body_text = crawled_page.doc.at('html').text
+            else
+              body_text = crawled_page.body.to_s.force_encoding('UTF-8') || crawled_page.body.to_s
+            end
+            if body_text
+
+
+              # Phone
+              body_text.scan(/\(?([0-9]{3})\)?([ .-]?)([0-9]{3})\2([0-9]{4})/).each do |phone_number|
+                if phone_number
+                  phone_number = phone_number.to_s.scan(/\d/).join
+                  @numbers << [phone_number, crawled_page.url]
+                  # @current_user.notifications.add ["Phone: #{phone_number}", @original_domain]
+                end
+              end
+
+              # Email
+              body_text.scan(/[\w\d]+[\w\d.-]@[\w\d.-]+\.\w{2,6}/).each do |address|
+                if address
+                  @addresses << [address.to_s.downcase, crawled_page.url]
+                  # @current_user.notifications.add ["Email: #{address.to_s.downcase}", @original_domain]
+                end
+              end
+
+              # Twitter
+              body_text.scan(/twitter\.com(?:\/\#!)?\/(\w+)/i).each do |twitter|
+                if twitter and !twitter.join.match(".php")
+                  @profiles << ["http://twitter.com/#{twitter.join.downcase}", crawled_page.url]
+                  # @current_user.notifications.add ["Twitter: #{twitter.join.downcase}", @original_domain]
+                end
+              end
+
+              # Facebook
+              body_text.scan(/(?:https?:\/\/)?(?:www\.)?facebook\.com\/(?:(?:\w)*#!\/)?(?:pages\/)?(?:[\w\-]*\/)*([\w\-\.]*)/).each do |facebook|
+                if facebook and !facebook.to_s.match(".php")
+                  @profiles << ["http://facebook.com/#{facebook.join.downcase}", crawled_page.url]
+                  # @current_user.notifications.add ["Facebook: #{facebook.join.downcase}", @original_domain]
+                end
+              end
+
+              # LinkedIn
+              [body_text.scan(/(?<=linkedin.com\/in\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=www.linkedin.com\/in\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=www.linkedin.com\/pub\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=linkedin.com\/pub\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=linkedin.com\/in\/)[a-z0-9_-]{3,16}/)].flatten.each do |linkedin|
+                if linkedin
+                  @profiles << ["http://linkedin.com/in/#{linkedin.downcase}", crawled_page.url]
+                  # @current_user.notifications.add ["LinkedIn: #{linkedin.downcase}", @original_domain]
+                end
+              end
+
+              # Google+
+              body_text.scan(/(?<=plus.google.com\/)[a-z0-9_-]{3,16}/).each do |googleplus|
+                if googleplus
+                  @profiles << ["http://plus.google.com/+#{googleplus.downcase}", crawled_page.url]
+                  #  @current_user.notifications.add ["Google+: #{googleplus.downcase}", @original_domain]
+                end
+              end
+
+              # Instagram
+              body_text.scan(/(?<=instagram.com\/)[a-z0-9_-]{3,16}/).each do |instagram|
+                if instagram
+                  @profiles << ["http://instagram.com/#{instagram.downcase}", crawled_page.url]
+                  # @current_user.notifications.add ["Instagram: #{instagram.downcase}", @original_domain]
+                end
+              end
+
+              # Pinterest
+              body_text.scan(/(?<=pinterest.com\/)[a-z0-9_-]{3,16}/).each do |pinterest|
+                if pinterest
+                  @profiles << ["http://pinterest.com/#{pinterest.downcase}", crawled_page.url]
+                  #  @current_user.notifications.add ["Pinterest: #{pinterest.downcase}", @original_domain]
+                end
+              end
+
+              # Github
+              [body_text.scan(/(?<=github.com\/user\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=www.github.com\/user\/)[a-z0-9_-]{3,16}/)].flatten.each do |username|
+                if username
+                  @profiles << ["http://github.com/#{username.downcase}", crawled_page.url]
+                  # @current_user.notifications.add ["GitHub: #{username.downcase}", @original_domain]
+                end
+              end
+
+
+              # Vimeo
+              body_text.scan(/vimeo\.com(?:\/\#!)?\/(\w+)/i).each do |vimeo|
+                if vimeo
+                  @profiles << ["http://vimeo.com/#{vimeo.join.downcase}", crawled_page.url]
+                  # @current_user.notifications.add ["Vimeo: #{vimeo.join.downcase}", @original_domain]
+                end
+              end
+
+
+              # # lastfm
+              # [body_text.scan(/(?<=lastfm.com\/user\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=www.lastfm.com\/user\/)[a-z0-9_-]{3,16}/)].flatten.each do |username|
+              #   if username
+              #     @profiles << ["http://lastfm.com/user/#{username.downcase}", crawled_page.url]
+              #     @current_user.notifications.add ["LastFM: #{username.downcase}", @original_domain]
+              #   end
+              # end
+
+              # Stumbleupon
+              #   body_text.scan(/stumbleupon\.com(?:\/\#!)?\/(\w+)/i).each do |stumbleupon|
+              #     if stumbleupon
+              #       @profiles << ["http://stumbleupon.com/#{stumbleupon.join.downcase}", crawled_page.url]
+              #       @current_user.notifications.add ["StumbleUpon: #{stumbleupon.join.downcase}", @original_domain]
+              #     end
+              #   end
+
+              # Flickr
+              #    body_text.scan(/flickr\.com(?:\/\#!)?\/(\w+)/i).each do |username|
+              #      if username
+              #        @profiles << ["http://flickr.com/#{username.join.downcase}", crawled_page.url]
+              #        @current_user.notifications.add ["Flicker: #{username.downcase}", @original_domain]
+              #      end
+              #    end
+
+              # Foursquare
+              #    [body_text.scan(/(?<=foursquare.com\/user\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=www.foursquare.com\/user\/)[a-z0-9_-]{3,16}/)].flatten.each do |username|
+              #      if username
+              #        @profiles << ["http://foursquare.com/user/#{username.downcase}", crawled_page.url]
+              #        @current_user.notifications.add ["Foursquare: #{username.downcase}", @original_domain]
+              #      end
+              #    end
+
+              # SoundCloud
+              #      body_text.scan(/soundcloud\.com(?:\/\#!)?\/(\w+)/i).each do |soundcloud|
+              #        if soundcloud
+              #          @profiles << ["http://soundcloud.com/#{soundcloud.join.downcase}", crawled_page.url]
+              #          @current_user.notifications.add ["SoundCloud: #{soundcloud.join.downcase}", @original_domain]
+              #        end
+              #      end
+
+              # Meetup
+              #       [body_text.scan(/(?<=meetup.com\/members\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=www.meetup.com\/members\/)[a-z0-9_-]{3,16}/)].flatten.each do |meetup|
+              #         if meetup
+              #           @profiles << ["http://meetup.com/members/#{meetup.downcase}", crawled_page.url]
+              #           @current_user.notifications.add ["Meetup: #{meetup.downcase}", @original_domain]
+              #         end
+              #       end
+
+              # Reddit
+              #   [body_text.scan(/(?<=reddit.com\/user\/)[a-z0-9_-]{3,16}/), body_text.scan(/(?<=www.reddit.com\/user\/)[a-z0-9_-]{3,16}/)].flatten.each do |reddit|
+              #     if reddit
+              #       @profiles << ["http://reddit.com/user/#{reddit.downcase}", crawled_page.url]
+              #       @current_user.notifications.add ["Reddit: #{reddit.downcase}", @original_domain]
+              #     end
+              #   end
+
+
+            end
+          end
+        end
+
+        crawler.on_crawl_end do
+          unless @addresses.empty? and @profiles.empty? and @numbers.empty?
+            @addresses.uniq! { |a| a.first }
+            @profiles.uniq! { |p| p.first }
+            @numbers.uniq! { |n| n.first }
+            LeadWorker.perform_async(@addresses, @numbers, @profiles, @current_user.id, @website.id, @original_domain)
+          end
+          @current_user.job_ids.delete job_id
+          @current_user.active_engines.decr
+        end
+      end
+    end
+
+    benchmark.finish
+  end
+end
+
+
+
+
+
+

+ 86 - 0
engine/app/workers/text_analysis_worker.rb

@@ -0,0 +1,86 @@
+require 'sad_panda'
+require 'odyssey'
+require 'engtagger'
+require 'whatlanguage'
+require 'json'
+
+class TextAnalysisWorker
+  include Sidekiq::Worker
+  include Sidekiq::Status::Worker
+  include Sidekiq::Benchmark::Worker
+
+  sidekiq_options :queue => 'default', :retry => false, :backtrace => true, expires_in: 1.hour
+
+
+  @@wl = WhatLanguage.new(:all)
+  @@tgr = EngTagger.new
+
+  
+  def perform(record_id, klass)
+
+    benchmark.text_analysis_metric do
+
+      record = nil
+      if klass == "EmailLead"
+        record = EmailLead.find(record_id)
+      elsif klass == "PhoneLead"
+        record = PhoneLead.find(record_id)
+      elsif klass == "SocialLead"
+        record = SocialLead.find(record_id)
+      end
+
+
+    if record and record.page_text
+      
+        hash = {}
+        tagged = @@tgr.add_tags record.page_text
+        hash[:word_list] =  @@tgr.get_words record.page_text
+        hash[:nouns] = @@tgr.get_nouns(tagged)
+        hash[:proper_nouns] = @@tgr.get_proper_nouns(tagged)
+        hash[:past_tense_verbs] = @@tgr.get_past_tense_verbs(tagged)
+        hash[:adjectives] =  @@tgr.get_adjectives(tagged)
+        hash[:noun_phrases] = @@tgr.get_noun_phrases(tagged)
+        hash[:language] = @@wl.language record.page_text
+        hash[:languages_ranked] = @@wl.process_text record.page_text
+        hash[:profanity] = SadPanda.polarity record.page_text
+        hash[:emotion] = SadPanda.emotion record.page_text
+        hash[:reading_level] = Odyssey.coleman_liau record.page_text
+        names = text.scan(/([A-Z][a-z]+(?=\s[A-Z])(?:\s[A-Z][a-z]+)+)/)
+        hash[:names] = names.to_s
+
+        if names
+          begin
+          names.flatten!
+          names.uniq!
+          names.each do |name|
+            first_name = name.split(" ").first
+            last_name = name.split(" ").last
+            gender = Guess.gender(first_name.to_s.humanize)
+            ethnicity = $races[last_name.to_s.upcase]
+            if gender or ethnicity
+              person = Person.find_or_initialize_by(:first_name => first_name.humanize, :last_name => last_name.humanize)
+              unless person.gender or person.ethnicity
+                person.gender = gender.to_s
+                person.ethnicity = ethnicity.to_s
+                person.save
+              end
+            end
+          end
+          rescue
+            #ignore
+          end
+
+        end
+
+        record.update(:page_json => hash.to_json)
+
+    end
+    end
+    benchmark.finish
+
+  end
+end
+
+
+
+

+ 36 - 0
engine/app/workers/twitter_worker.rb

@@ -0,0 +1,36 @@
+class LinkedinWorker
+  include Sidekiq::Worker
+  sidekiq_options :retry => false, :backtrace => true
+
+  def perform(id)
+
+    record = SocialLead.find(id)
+    if record
+    profile = Linkedin::Profile.new(record.profile_url) # , { company_details: true, open_timeout: 30, proxy_ip: '127.0.0.1', proxy_port: '3128', username: 'user', password: 'pass' })
+      if profile
+        record.first_name = profile.first_name          # The first name of the contact
+        record.last_name = profile.last_name           # The last name of the contact
+        record.description = profile.summary      # The summary of the profile
+        record.location = profile.location            # The location of the contact
+        record.country = profile.country             # The country of the contact
+        record.image_url = profile.picture             # The profile picture link of profile
+        record.name = profile.name
+
+       # record.title = profile.title               # The full name of the profile
+
+
+       # record.industry = profile.industry            # The domain for which the contact belongs
+       # record.skills = profile.skills.to_s            # Array of skills of the profile
+       #  record.organization = profile.organizations.to_s     # Array organizations of the profile
+       # record.education = profile.education.to_s      # Array of hashes for education
+       # record.websites = profile.websites.to_s         # Array of websites
+       # record.interests = profile.groups.to_s              # Array of groups
+       # record.followers = profile.number_of_connections # The number of connections as a string
+        record.save
+
+      end
+    end
+
+
+  end
+end

+ 117 - 0
engine/app/workers/validation_worker.rb

@@ -0,0 +1,117 @@
+class ValidationWorker
+  include Sidekiq::Worker
+  include Sidekiq::Status::Worker # Important!
+  include Sidekiq::Benchmark::Worker
+  sidekiq_options   :queue => 'validation', :retry => false, :backtrace => true, expires_in: 1.hour
+
+  def perform(email_address, user_id)
+    benchmark.extraction_metric do
+
+      email = EmailLead.find_or_initialize_by(:address => email_address, :user_id => user_id)
+
+    if email
+      email_domain = email.address.rpartition("@")[2]
+      school = Swot::school_name email.address
+
+      if school or email_domain.downcase.match(".edu")
+        email.organization = school || "Education"
+      end
+
+      # govt_domain = Gman.new email.address
+      #
+      # if govt_domain or email_domain.downcase.match(".gov")
+      #   email.organization = govt_domain.agency
+      #   email.organization ||= "Government"
+      # end
+
+    unless email.blank? or email.is_valid or email.address.match("gmail.com") or email.address.match("abuse") or email.address.match("yahoo.com") or email.address.match("hotmail.com") or email.address.match("msn.com") or email.address.match("aol.com") or email.address.split(".").last.match("ca")
+      checker = ["support", $public_hostname].join("@")
+      validator = EmailAuthentication::Base.new
+      result = validator.check(email.address, checker)
+        if result and result[0] and result.to_s.match("250")
+          email.smtp_reply = result[1].to_s.first(255)
+          email.is_valid = true
+        else
+          email.is_valid = false
+        end
+    end
+    email.save
+    end
+      benchmark.finish
+    end
+  end
+
+  def permutate(first_name, last_name, domain)
+
+    first_initial = first_name[0]
+    last_initial = last_name[0]
+
+    # Define each name permutation manually
+    name_permutations = <<PERMS
+{first_name}
+{last_name}
+{first_initial}
+{last_initial}
+{first_name}{last_name}
+{first_name}.{last_name}
+{first_initial}{last_name}
+{first_initial}.{last_name}
+{first_name}{last_initial}
+{first_name}.{last_initial}
+{first_initial}{last_initial}
+{first_initial}.{last_initial}
+{last_name}{first_name}
+{last_name}.{first_name}
+{last_name}{first_initial}
+{last_name}.{first_initial}
+{last_initial}{first_name}
+{last_initial}.{first_name}
+{last_initial}{first_initial}
+{last_initial}.{first_initial}
+{first_name}-{last_name}
+{first_initial}-{last_name}
+{first_name}-{last_initial}
+{first_initial}-{last_initial}
+{last_name}-{first_name}
+{last_name}-{first_initial}
+{last_initial}-{first_name}
+{last_initial}-{first_initial}
+{first_name}_{last_name}
+{first_initial}_{last_name}
+{first_name}_{last_initial}
+{first_initial}_{last_initial}
+{last_name}_{first_name}
+{last_name}_{first_initial}
+{last_initial}_{first_name}
+{last_initial}_{first_initial}
+PERMS
+
+    # substitutions to get all permutations to an Array
+    name_permutations = name_permutations.gsub('{first_name}', first_name)
+                            .gsub('{last_name}', last_name)
+                            .gsub('{first_initial}', first_initial)
+                            .gsub('{last_initial}', last_initial)
+                            .split($/)
+
+    # accept domain arg to be a string or an array
+    # %40 => @
+    if domain.is_a? String
+      domain = ['%40'].product domain.split
+    elsif domain.is_a? Array
+      domain = ['%40'].product domain
+    else
+      raise ArgumentError, 'Domain was neither a String or Array'
+    end
+
+    name_and_domains = name_permutations.product domain
+
+    # combine names and domains
+    # return permuations
+    permutations = name_and_domains.map {|email| email.join }
+  end
+
+
+
+end
+
+

+ 49 - 0
engine/app/workers/whois_worker.rb

@@ -0,0 +1,49 @@
+class WhoisWorker
+  include Sidekiq::Worker
+  include Sidekiq::Status::Worker # Important!
+  include Sidekiq::Benchmark::Worker
+  sidekiq_options :retry => false, :backtrace => true,  expires_in: 1.hour
+
+  def perform(domain)
+    benchmark.whois_metric do
+    website = Website.find_by(:domain => domain)
+    if website and Rails.env.production?
+      whois_data = Whois.whois(website.domain)
+      if whois_data and whois_data.contacts
+        contacts = whois_data.contacts
+        unless contacts.blank?
+          contacts.each do |contact|
+            if contact.organization
+            company = Company.find_or_initialize_by(:company_name => contact.organization, :domain => website.domain)
+            unless company.id
+              company.address  = contact.address
+              company.city = contact.city
+              company.state = contact.state
+              company.country = contact.country
+              if company.save
+                website.company_id = company.id
+              end
+            end
+            if contact.name
+              name_array = contact.name.split(" ")
+              person = Person.find_or_initialize_by(:first_name => name_array.first, :last_name => name_array.last, :domain => website.domain)
+              unless person.id
+                person.company_id = company.id
+                person.save
+              end
+            end
+         end
+        end
+        end
+        website.whois = whois_data.to_s
+        website.save
+      end
+    end
+  end
+  benchmark.finish
+
+  end
+
+end
+
+

+ 34 - 0
engine/app/workers/youtube_worker.rb

@@ -0,0 +1,34 @@
+class LinkedinWorker
+  include Sidekiq::Worker
+  sidekiq_options :retry => false, :backtrace => true
+
+  def perform(id)
+
+    lead = SocialLead.find(id)
+    if lead
+    profile = Linkedin::Profile.new(lead.profile_url) # , { company_details: true, open_timeout: 30, proxy_ip: '127.0.0.1', proxy_port: '3128', username: 'user', password: 'pass' })
+      if profile
+        record = Person.find_or_initialize_by(:first_name => profile.first_name, :last_name => profile.last_name)
+        record.first_name = profile.first_name          # The first name of the contact
+        record.last_name = profile.last_name           # The last name of the contact
+        record.description = profile.summary      # The summary of the profile
+        record.location = profile.location            # The location of the contact
+        record.country = profile.country             # The country of the contact
+        record.image_url = profile.picture             # The profile picture link of profile
+        record.name = profile.name
+       # record.title = profile.title               # The full name of the profile
+       # record.industry = profile.industry            # The domain for which the contact belongs
+       # record.skills = profile.skills.to_s            # Array of skills of the profile
+       #  record.organization = profile.organizations.to_s     # Array organizations of the profile
+       # record.education = profile.education.to_s      # Array of hashes for education
+       # record.websites = profile.websites.to_s         # Array of websites
+       # record.interests = profile.groups.to_s              # Array of groups
+       # record.followers = profile.number_of_connections # The number of connections as a string
+        record.save
+
+      end
+    end
+
+
+  end
+end

+ 3 - 0
engine/bin/bundle

@@ -0,0 +1,3 @@
+#!/usr/bin/env ruby
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
+load Gem.bin_path('bundler', 'bundle')

+ 9 - 0
engine/bin/rails

@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+begin
+  load File.expand_path('../spring', __FILE__)
+rescue LoadError => e
+  raise unless e.message.include?('spring')
+end
+APP_PATH = File.expand_path('../../config/application', __FILE__)
+require_relative '../config/boot'
+require 'rails/commands'

+ 9 - 0
engine/bin/rake

@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+begin
+  load File.expand_path('../spring', __FILE__)
+rescue LoadError => e
+  raise unless e.message.include?('spring')
+end
+require_relative '../config/boot'
+require 'rake'
+Rake.application.run

+ 8 - 0
engine/bin/rspec

@@ -0,0 +1,8 @@
+#!/usr/bin/env ruby
+begin
+  load File.expand_path('../spring', __FILE__)
+rescue LoadError => e
+  raise unless e.message.include?('spring')
+end
+require 'bundler/setup'
+load Gem.bin_path('rspec-core', 'rspec')

+ 29 - 0
engine/bin/setup

@@ -0,0 +1,29 @@
+#!/usr/bin/env ruby
+require 'pathname'
+
+# path to your application root.
+APP_ROOT = Pathname.new File.expand_path('../../',  __FILE__)
+
+Dir.chdir APP_ROOT do
+  # This script is a starting point to setup your application.
+  # Add necessary setup steps to this file:
+
+  puts "== Installing dependencies =="
+  system "gem install bundler --conservative"
+  system "bundle check || bundle install"
+
+  # puts "\n== Copying sample files =="
+  # unless File.exist?("config/database.yml")
+  #   system "cp config/database.yml.sample config/database.yml"
+  # end
+
+  puts "\n== Preparing database =="
+  system "bin/rake db:setup"
+
+  puts "\n== Removing old logs and tempfiles =="
+  system "rm -f log/*"
+  system "rm -rf tmp/cache"
+
+  puts "\n== Restarting application server =="
+  system "touch tmp/restart.txt"
+end

+ 15 - 0
engine/bin/spring

@@ -0,0 +1,15 @@
+#!/usr/bin/env ruby
+
+# This file loads spring without using Bundler, in order to be fast.
+# It gets overwritten when you run the `spring binstub` command.
+
+unless defined?(Spring)
+  require 'rubygems'
+  require 'bundler'
+
+  if (match = Bundler.default_lockfile.read.match(/^GEM$.*?^    (?:  )*spring \((.*?)\)$.*?^$/m))
+    Gem.paths = { 'GEM_PATH' => [Bundler.bundle_path.to_s, *Gem.path].uniq.join(Gem.path_separator) }
+    gem 'spring', match[1]
+    require 'spring/binstub'
+  end
+end

+ 26 - 0
engine/config.ru

@@ -0,0 +1,26 @@
+# This file is used by Rack-based servers to start the application.
+
+require ::File.expand_path('../config/environment', __FILE__)
+# require 'sidekiq'
+# require 'sidekiq/web'
+# require 'sidekiq/pro/web'
+
+require 'sidekiq/rack/batch_status'
+use Sidekiq::Rack::BatchStatus
+run Rails.application
+# run Sidekiq::Web
+
+
+
+
+
+#####
+
+
+# # config.ru
+# require 'sidekiq/web'
+# require 'sidekiq-statistic'
+#
+# use Rack::Session::Cookie, secret: 'some unique secret string here'
+# Sidekiq::Web.instance_eval { @middleware.reverse! } # Last added, First Run
+# run Sidekiq::Web

+ 7 - 0
engine/config/application.rb

@@ -0,0 +1,7 @@
+require File.expand_path('../boot', __FILE__)
+require 'rails/all'
+Bundler.require(*Rails.groups)
+module ContactRocket
+  class Application < Rails::Application
+  end
+end

+ 3 - 0
engine/config/boot.rb

@@ -0,0 +1,3 @@
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
+
+require 'bundler/setup' # Set up gems listed in the Gemfile.

+ 10 - 0
engine/config/containers/Dockerfile-base

@@ -0,0 +1,10 @@
+FROM phusion/passenger-ruby22
+MAINTAINER Peter Alcock
+ENV HOME /root
+CMD ["/sbin/my_init"]
+RUN mkdir -p /app
+WORKDIR /app
+COPY Gemfile.lock Gemfile.lock
+COPY Gemfile Gemfile
+RUN bundle install --without test development
+COPY . .

+ 26 - 0
engine/config/containers/Dockerfile-nginx

@@ -0,0 +1,26 @@
+# build from the official Nginx image
+FROM nginx
+
+# install essential Linux packages
+RUN apt-get update -qq && apt-get -y install apache2-utils
+
+# establish where Nginx should look for files
+ENV RAILS_ROOT /app
+WORKDIR /app
+
+# create log directory
+RUN mkdir -p log
+
+# copy over static assets
+COPY public public/
+
+# copy our Nginx config template
+COPY config/containers/nginx.conf /tmp/docker_example.nginx
+
+# substitute variable references in the Nginx config template for real values from the environment
+# put the final config in its place
+RUN envsubst '$RAILS_ROOT' < /tmp/docker_example.nginx > /etc/nginx/conf.d/default.conf
+
+# Use the "exec" form of CMD so Nginx shuts down gracefully on SIGTERM (i.e. `docker stop`)
+CMD [ "nginx", "-g", "daemon off;" ]
+EXPOSE 80

+ 12 - 0
engine/config/containers/Dockerfile-web

@@ -0,0 +1,12 @@
+FROM ruby:2.3.1
+RUN apt-get update -qq && apt-get install -y build-essential make cmake autoconf automake libcurl3 zlib1g-dev libqt4-dev bison libpq-dev imagemagick libmagickwand-dev libkrb5-dev libicu-dev libssl-dev libyaml-dev zlib1g-dev libxml2-dev libxslt1-dev libcurl4-openssl-dev libqtwebkit-dev bison libssl-dev  libc6-dev
+ENV RAILS_ROOT /contactrocket
+ENV RAILS_ENV production
+RUN mkdir -p $RAILS_ROOT
+WORKDIR /contactrocket
+COPY Gemfile Gemfile
+COPY Gemfile.lock Gemfile.lock
+RUN bundle install
+COPY . .
+CMD ["config/containers/app_cmd.sh"]
+

+ 2 - 0
engine/config/containers/app_cmd.sh

@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+exec bundle exec sidekiq start -C $RAILS_ROOT/config/sidekiq.yml;

+ 43 - 0
engine/config/containers/nginx-crm.conf

@@ -0,0 +1,43 @@
+
+upstream unicorn {
+  server crm:3001;
+}
+
+server {
+  server_name localhost;
+  root   $RAILS_ROOT/public;
+  index  index.html;
+  access_log $RAILS_ROOT/log/nginx.access.log;
+  error_log $RAILS_ROOT/log/nginx.error.log;
+
+  location ~ /\. {
+    deny all;
+  }
+
+  location ~* ^.+\.(rb|log)$ {
+    deny all;
+  }
+
+  location ~ ^/(assets|images|javascripts|stylesheets|swfs|system)/ {
+    try_files $uri @rails;
+    access_log off;
+    gzip_static on; # to serve pre-gzipped version
+    expires max;
+    add_header Cache-Control public;
+    add_header Last-Modified "";
+    add_header ETag "";
+    break;
+  }
+
+  location / {
+    try_files $uri @rails;
+  }
+
+  location @rails {
+    proxy_set_header  X-Real-IP  $remote_addr;
+    proxy_set_header  X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header Host $http_host;
+    proxy_redirect off;
+    proxy_pass http://unicorn;
+  }
+}

+ 44 - 0
engine/config/containers/nginx.conf

@@ -0,0 +1,44 @@
+
+upstream unicorn {
+  server app:3000;
+}
+
+server {
+  server_name localhost;
+  root   $RAILS_ROOT/public;
+  index  index.html;
+
+  access_log $RAILS_ROOT/log/nginx.access.log;
+  error_log $RAILS_ROOT/log/nginx.error.log;
+
+  location ~ /\. {
+    deny all;
+  }
+
+  location ~* ^.+\.(rb|log)$ {
+    deny all;
+  }
+
+  location ~ ^/(assets|images|javascripts|stylesheets|swfs|system)/ {
+    try_files $uri @rails;
+    access_log off;
+    gzip_static on; # to serve pre-gzipped version
+    expires max;
+    add_header Cache-Control public;
+    add_header Last-Modified "";
+    add_header ETag "";
+    break;
+  }
+
+  location / {
+    try_files $uri @rails;
+  }
+
+  location @rails {
+    proxy_set_header  X-Real-IP  $remote_addr;
+    proxy_set_header  X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header Host $http_host;
+    proxy_redirect off;
+    proxy_pass http://unicorn;
+  }
+}

+ 61 - 0
engine/config/containers/unicorn.rb

@@ -0,0 +1,61 @@
+# Where our application lives. $RAILS_ROOT is defined in our Dockerfile.
+app_path = ENV['RAILS_ROOT']
+crm_path = ENV['CRM_ROOT']
+
+# Set the server's working directory
+working_directory app_path
+
+# Define where Unicorn should write its PID file
+pid "#{app_path}/tmp/pids/unicorn.pid"
+
+# Bind Unicorn to the container's default route, at port 3000
+listen "0.0.0.0:3000"
+
+# Define where Unicorn should write its log files
+stderr_path "#{app_path}/log/unicorn.stderr.log"
+stdout_path "#{app_path}/log/unicorn.stdout.log"
+
+# Define the number of workers Unicorn should spin up.
+# A new Rails app just needs one. You would scale this
+# higher in the future once your app starts getting traffic.
+# See https://unicorn.bogomips.org/TUNING.html
+worker_processes 1
+
+# Make sure we use the correct Gemfile on restarts
+before_exec do |server|
+  ENV['BUNDLE_GEMFILE'] = "#{app_path}/Gemfile"
+end
+
+# Speeds up your workers.
+# See https://unicorn.bogomips.org/TUNING.html
+preload_app true
+
+#
+# Below we define how our workers should be spun up.
+# See https://unicorn.bogomips.org/Unicorn/Configurator.html
+#
+
+before_fork do |server, worker|
+  # the following is highly recomended for Rails + "preload_app true"
+  # as there's no need for the master process to hold a connection
+  if defined?(ActiveRecord::Base)
+    ActiveRecord::Base.connection.disconnect!
+  end
+
+  # Before forking, kill the master process that belongs to the .oldbin PID.
+  # This enables 0 downtime deploys.
+  old_pid = "#{server.config[:pid]}.oldbin"
+  if File.exists?(old_pid) && server.pid != old_pid
+    begin
+      Process.kill("QUIT", File.read(old_pid).to_i)
+    rescue Errno::ENOENT, Errno::ESRCH
+      # someone else did our job for us
+    end
+  end
+end
+
+after_fork do |server, worker|
+  if defined?(ActiveRecord::Base)
+    ActiveRecord::Base.establish_connection
+  end
+end

+ 29 - 0
engine/config/database.yml

@@ -0,0 +1,29 @@
+default: &default
+  adapter: postgresql
+  encoding: unicode
+  port: 5432
+  username: rocketeer
+  password: 
+  database: contactrocket
+  schema_search_path: public,shared_extensions
+  pool: 10
+
+
+development:
+  adapter: postgresql
+  encoding: unicode
+  host: localhost
+  port: 5432
+  username: postgres
+  password: postgres
+  database: contactrocket
+  schema_search_path: public,shared_extensions
+  pool: 10
+
+
+test:
+  <<: *default
+
+
+production:
+  <<: *default

+ 31 - 0
engine/config/deploy.rb

@@ -0,0 +1,31 @@
+set :application, 'contactrocket'
+set :stage, 'production'
+set :scm, :git
+set :format, :pretty
+set :log_level, :debug
+set :pty, false
+set :keep_releases, 2
+set :sidekiq_config,  'config/sidekiq.yml'
+set :sidekiq_processes, 2
+set :sidekiq_concurrency, 10
+set :sidekiq_default_hooks , true
+set :sidekiq_timeout, 60
+set :sidekiq_monit_conf_dir , '/etc/monit/conf.d'
+set :sidekiq_monit_use_sudo , true
+set :sidekiq_monit_default_hooks , true
+#set :sidekiq_pid , File.join(shared_path, 'tmp', 'pids', 'sidekiq.pid')
+#set :sidekiq_log , File.join(shared_path, 'log', 'sidekiq.log')
+#set :sidekiq_options , nil
+#set :sidekiq_require , nil
+#set :sidekiq_tag , nil
+#set :sidekiq_config , "config/sidekiq.yml" # if you have a config/sidekiq.yml, do not forget to set this.
+#set :sidekiq_queue , nil
+#set :monit_bin , '/usr/bin/monit'
+#set :sidekiq_processes , 4
+#set :sidekiq_options_per_process , nil
+#set :sidekiq_concurrency , 10
+#set :sidekiq_monit_templates_path , 'config/deploy/templates'
+#set :sidekiq_service_name , "sidekiq_#{fetch(set :application)}_#{fetch(set :sidekiq_env)}"
+#set :sidekiq_cmd , "#{fetch(set :bundle_cmd, "bundle")} exec sidekiq" # Only for capistrano2.5
+#set :sidekiqctl_cmd , "#{fetch(set :bundle_cmd, "bundle")} exec sidekiqctl" # Only for capistrano2.5
+#set :sidekiq_user , nil #user to run sidekiq as

+ 75 - 0
engine/config/deploy/production.rb

@@ -0,0 +1,75 @@
+
+set :stage, "production"
+role :app, %w{ubuntu@your-server.net}
+set :deploy_to, '/srv/contactrocket/engine'
+server 'your-server.net',
+       user: 'ubuntu',
+       roles: %w{app},
+       ssh_options: {
+           user: 'ubuntu',
+           keys: %w(/Users/machine/.ssh/rockbox.pem),
+           forward_agent: true,
+           auth_methods: %w(publickey)
+       }
+
+set :linked_files, %w{config/database.yml config/sidekiq.yml}
+set :linked_dirs, %w{tmp/cache vendor/bundle log tmp/pids}
+
+
+set :pty,             true
+set :use_sudo,        true
+set :scm,           :git
+set :branch,        :master
+set :format,        :pretty
+set :log_level,     :debug
+set :keep_releases, 2
+set :local_repository, "file://."
+set :deploy_via, :copy
+# cache only seems to work if use scm
+set :copy_cache, true
+set :copy_via, :scp
+set :copy_exclude, [".zeus*", ".bundle", ".git", "tmp/*", "doc", "log/*", "fixtures/*"]
+
+
+task :deploy_from_local_repo do
+
+  run_locally do
+    execute "tar -zcvf /tmp/engine_repo.tgz .git"
+  end
+  set :repo_url,  "file:///tmp/.git"
+
+  on roles(:app) do
+    upload! '/tmp/engine_repo.tgz', '/tmp/engine_repo.tgz'
+    execute 'tar -zxvf /tmp/engine_repo.tgz -C /tmp'
+  end
+end
+
+
+task :remove_repo do
+  on roles(:app) do
+    execute "rm -rf /tmp/*.git"
+  end
+end
+
+
+namespace :deploy do
+  desc "Make sure local git is in sync with remote."
+  task :check_revision do
+    on roles(:app) do
+      unless `git rev-parse HEAD` == `git rev-parse origin/master`
+        puts "WARNING: HEAD is not the same as origin/master"
+        puts "Run `git push` to sync changes."
+        exit
+      end
+    end
+  end
+
+
+  before :deploy, :deploy_from_local_repo
+  after  :finishing,    :cleanup
+  after  :finishing,    :restart
+  after :deploy, :remove_repo
+
+
+end
+

+ 5 - 0
engine/config/environment.rb

@@ -0,0 +1,5 @@
+# Load the Rails application.
+require File.expand_path('../application', __FILE__)
+
+# Initialize the Rails application.
+Rails.application.initialize!

+ 21 - 0
engine/config/environments/development.rb

@@ -0,0 +1,21 @@
+Rails.application.configure do
+    ENV['API_HOST'] = "localhost"
+    ENV['CRM_HOST'] = "localhost:3001"
+    ENV['APP_HOST'] = "localhost:3000"
+    ENV['ELASTICSEARCH_URL'] = "http://localhost:9200/"
+
+        ENV['REDIS_HOST'] = "localhost"
+    # Code is not reloaded between requests.
+    config.cache_classes = false
+
+    config.eager_load = false
+
+    config.log_level = :info
+
+    # Use default logging formatter so that PID and timestamp are not suppressed.
+    config.log_formatter = ::Logger::Formatter.new
+    config.action_mailer.default_url_options = { :host => "localhost:3000" }
+    config.action_mailer.delivery_method = :letter_opener
+    config.action_mailer.raise_delivery_errors = true
+    config.action_mailer.perform_deliveries = true
+end

+ 29 - 0
engine/config/environments/production.rb

@@ -0,0 +1,29 @@
+Rails.application.configure do
+
+
+  # Code is not reloaded between requests.
+  config.cache_classes = true
+
+  config.eager_load = true
+
+  config.log_level = :warn
+
+  # Use default logging formatter so that PID and timestamp are not suppressed.
+  config.log_formatter = ::Logger::Formatter.new
+  config.action_mailer.smtp_settings = {
+      address: "email-smtp.us-east-1.amazonaws.com",
+      port: 587,
+      domain: "your-server.net",
+      authentication: "plain",
+      enable_starttls_auto: true,
+      user_name: "",
+      password: ""
+  }
+
+  config.action_mailer.default_url_options = { :host => "your-server.net" }
+  config.action_mailer.delivery_method = :sendmail
+  config.action_mailer.perform_deliveries = true
+  config.action_mailer.raise_delivery_errors = false
+
+end
+

+ 23 - 0
engine/config/environments/test.rb

@@ -0,0 +1,23 @@
+Rails.application.configure do
+  ENV['API_HOST'] = "localhost"
+  ENV['CRM_HOST'] = "localhost:3001"
+  ENV['APP_HOST'] = "localhost:3000"
+  ENV['ELASTICSEARCH_URL'] = "http://localhost:9200/"
+
+
+    ENV['REDIS_HOST'] = "localhost"
+
+      config.cache_classes = false
+
+  config.eager_load = false
+
+  config.log_level = :warn
+
+  # Use default logging formatter so that PID and timestamp are not suppressed.
+  config.log_formatter = ::Logger::Formatter.new
+  config.action_mailer.default_url_options = { :host => "localhost:3000" }
+  config.action_mailer.delivery_method = :letter_opener
+  config.action_mailer.raise_delivery_errors = true
+  config.action_mailer.perform_deliveries = true
+end
+

+ 1 - 0
engine/config/initializers/active_job.rb

@@ -0,0 +1 @@
+ActiveJob::Base.queue_adapter = :sidekiq

+ 10 - 0
engine/config/initializers/analyzers.rb

@@ -0,0 +1,10 @@
+require 'phonelib'
+require 'stopwords'
+# require 'sentimental'
+require 'odyssey'
+require 'sad_panda'
+Phonelib.default_country = "US"
+# $sentimental = Sentimental.new
+$stopwords_filter = Stopwords::Snowball::Filter.new "en"
+# ActiveMedian.create_function
+# $total_company_count = Company.count

+ 2 - 0
engine/config/initializers/docker.rb

@@ -0,0 +1,2 @@
+require 'docker'
+Docker.url = ENV['DOCKER_URL']

+ 6 - 0
engine/config/initializers/email_validation.rb

@@ -0,0 +1,6 @@
+# if Rails.env.production?
+#   c = Curl::Easy.perform('http://169.254.169.254/latest/meta-data/public-hostname')
+#   $public_hostname = c.body_str
+# else
+$public_hostname = 'localhost'
+# end

+ 6 - 0
engine/config/initializers/ethnicity.rb

@@ -0,0 +1,6 @@
+require 'csv'
+$races = {}
+csv = CSV.open('vendor/data/ethnic_us_cencus_data.csv', 'r')
+csv.each do |row|
+  $races[row[0]] = {"white"=>row[5], "black" => row[6], "asian_or_pacific_islander" => row[7], "american_indian_or_alaska_native" => row[8], "multiple" => row[9], "hispanic" => row[10], "rank" => row[1]}
+end

+ 3 - 0
engine/config/initializers/polipus.rb

@@ -0,0 +1,3 @@
+require 'polipus'
+require 'polipus/plugins/cleaner'
+Polipus::Plugin.register Polipus::Plugin::Cleaner, reset: true

+ 2 - 0
engine/config/initializers/redis.rb

@@ -0,0 +1,2 @@
+Redis::Objects.redis = Redis.new( host: ENV['REDIS_HOST'], :db => 1)
+

+ 7 - 0
engine/config/initializers/searchkick.rb

@@ -0,0 +1,7 @@
+
+require "faraday_middleware/aws_signers_v4"
+Searchkick.client =
+    Elasticsearch::Client.new(
+        url: ENV["ELASTICSEARCH_URL"],
+        transport_options: {request: {timeout: 10}}
+    )

+ 37 - 0
engine/config/initializers/sidekiq.rb

@@ -0,0 +1,37 @@
+require 'sidekiq'
+require 'sidekiq-status'
+require 'sidekiq/pro/expiry'
+
+unless Rails.env.production?
+  require 'sidekiq-benchmark/testing'
+end
+
+sidekiq_config = { host: ENV['REDIS_HOST'], network_timeout: 60 }
+Sidekiq::Logging.logger.level = Logger::INFO
+
+Sidekiq.configure_server do |config|
+  config.redis = sidekiq_config
+end
+
+Sidekiq.configure_client do |config|
+  config.redis = sidekiq_config
+end
+
+Sidekiq.configure_client do |config|
+  config.client_middleware do |chain|
+    chain.add Sidekiq::Hierarchy::Client::Middleware
+    chain.add Sidekiq::Status::ClientMiddleware, expiration: 30.minutes # default
+  end
+end
+
+Sidekiq.configure_server do |config|
+  config.server_middleware do |chain|
+ #   chain.add Sidekiq::Statsd::ServerMiddleware, env: "production", prefix: "worker", host: "localhost", port: 8125
+    chain.add Sidekiq::Hierarchy::Server::Middleware
+    chain.add Sidekiq::Status::ServerMiddleware, expiration: 30.minutes # default
+  end
+  config.client_middleware do |chain|
+    chain.add Sidekiq::Hierarchy::Client::Middleware
+    chain.add Sidekiq::Status::ClientMiddleware, expiration: 30.minutes # default
+  end
+end

+ 2 - 0
engine/config/initializers/tika.rb

@@ -0,0 +1,2 @@
+
+#Tika::App.path = "#{Rails.root}/vendor/tika/tika-app-1.14.jar"

+ 35 - 0
engine/config/initializers/vars.rb

@@ -0,0 +1,35 @@
+$social_network_colors = {}
+$social_network_colors[:facebook] = "#3b5998"
+$social_network_colors[:twitter] = "#55acee"
+$social_network_colors[:okcupid] = "#3b5998"
+$social_network_colors[:instagram] = "#e95950"
+$social_network_colors[:pinterest] = "#cb2027"
+$social_network_colors[:linkedin] = "#007bb5"
+$social_network_colors[:google] = "#dd4b39"
+$social_network_colors[:tumblr] = "#32506d"
+$social_network_colors[:vk] = "#45668e"
+$social_network_colors[:vimeo] = "#aad450"
+$social_network_colors[:foursquare] = "#0072b1"
+$social_network_colors[:dribbble] = "#000000"
+$social_network_colors[:stumbleupon] = "#000000"
+$social_network_colors[:wordpress] = "#000000"
+$social_network_colors[:vine] = "#00bf8f"
+$social_network_colors[:snapchat] = "#fffc00"
+$social_network_colors[:youtube] = "#bb0000"
+$social_network_colors[:flickr] = "#ff0084"
+$social_network_colors[:github] = "#000000"
+$social_network_colors[:yelp] = "#c41200"
+$social_network_colors[:soundcloud] = "#ff7700"
+$social_network_colors[:lastfm] = "#c3000d"
+$social_network_colors[:sourceforge] = "#000000"
+$social_network_colors[:meetup] = "#e51937"
+$social_network_colors[:reddit] = "#FF5700"
+$social_network_colors[:rss] = "#ff6600"
+$social_network_colors[:spotify] = "#00e461"
+
+$social_networks = []
+$social_network_colors.keys.each do |key|
+  $social_networks << key.to_s.delete(":")
+end
+
+puts ">> LOADED SOCIAL NETWORKS: #{$social_networks}"

+ 64 - 0
engine/config/locales/devise.en.yml

@@ -0,0 +1,64 @@
+# Additional translations at https://github.com/plataformatec/devise/wiki/I18n
+
+en:
+
+  devise:
+    confirmations:
+      confirmed: "Your email address has been successfully confirmed."
+      send_instructions: "You will receive an email with instructions for how to confirm your email address in a few minutes."
+      send_paranoid_instructions: "If your email address exists in our database, you will receive an email with instructions for how to confirm your email address in a few minutes."
+    failure:
+      already_authenticated: "You are already signed in."
+      inactive: "Your account is not activated yet."
+      invalid: "Invalid %{authentication_keys} or password."
+      locked: "Your account is locked."
+      last_attempt: "You have one more attempt before your account is locked."
+      not_found_in_database: "Invalid %{authentication_keys} or password."
+      timeout: "Your session expired. Please sign in again to continue."
+      unauthenticated: "You need to sign in or sign up before continuing."
+      unconfirmed: "You have to confirm your email address before continuing."
+    mailer:
+      confirmation_instructions:
+        subject: "Confirmation instructions"
+      reset_password_instructions:
+        subject: "Reset password instructions"
+      unlock_instructions:
+        subject: "Unlock instructions"
+      password_change:
+        subject: "Password Changed"
+    omniauth_callbacks:
+      failure: "Could not authenticate you from %{kind} because \"%{reason}\"."
+      success: "Successfully authenticated from %{kind} account."
+    passwords:
+      no_token: "You can't access this page without coming from a password reset email. If you do come from a password reset email, please make sure you used the full URL provided."
+      send_instructions: "You will receive an email with instructions on how to reset your password in a few minutes."
+      send_paranoid_instructions: "If your email address exists in our database, you will receive a password recovery link at your email address in a few minutes."
+      updated: "Your password has been changed successfully. You are now signed in."
+      updated_not_active: "Your password has been changed successfully."
+    sessions:
+      user:
+        signed_in: 'HIDE'
+      admin:
+        signed_in: 'HIDE'
+    registrations:
+      destroyed: "Bye! Your account has been successfully cancelled. We hope to see you again soon."
+      signed_up: "HIDE"
+      signed_up_but_inactive: "You have signed up successfully. However, we could not sign you in because your account is not yet activated."
+      signed_up_but_locked: "You have signed up successfully. However, we could not sign you in because your account is locked."
+      signed_up_but_unconfirmed: "A message with a confirmation link has been sent to your email address. Please follow the link to activate your account."
+      update_needs_confirmation: "You updated your account successfully, but we need to verify your new email address. Please check your email and follow the confirm link to confirm your new email address."
+      updated: "Your account has been updated successfully."
+    unlocks:
+      send_instructions: "You will receive an email with instructions for how to unlock your account in a few minutes."
+      send_paranoid_instructions: "If your account exists, you will receive an email with instructions for how to unlock it in a few minutes."
+      unlocked: "Your account has been unlocked successfully. Please sign in to continue."
+  errors:
+    messages:
+      already_confirmed: "was already confirmed, please try signing in"
+      confirmation_period_expired: "needs to be confirmed within %{period}, please request a new one"
+      expired: "has expired, please request a new one"
+      not_found: "not found"
+      not_locked: "was not locked"
+      not_saved:
+        one: "1 error prohibited this %{resource} from being saved:"
+        other: "%{count} errors prohibited this %{resource} from being saved:"

+ 31 - 0
engine/config/locales/devise_invitable.en.yml

@@ -0,0 +1,31 @@
+en:
+  devise:
+    failure:
+      invited: "You have a pending invitation, accept it to finish creating your account."
+    invitations:
+      send_instructions: "An invitation email has been sent to %{email}."
+      invitation_token_invalid: "The invitation token provided is not valid!"
+      updated: "Your password was set successfully. You are now signed in."
+      updated_not_active: "Your password was set successfully."
+      no_invitations_remaining: "No invitations remaining"
+      invitation_removed: "Your invitation was removed."
+      new:
+        header: "Send invitation"
+        submit_button: "Send an invitation"
+      edit:
+        header: "Set your password"
+        submit_button: "Set my password"
+    mailer:
+      invitation_instructions:
+        subject: "Invitation instructions"
+        hello: "Hello %{email}"
+        someone_invited_you: "Someone has invited you to %{url}, you can accept it through the link below."
+        accept: "Accept invitation"
+        accept_until: "This invitation will be due in %{due_date}."
+        ignore: "If you don't want to accept the invitation, please ignore this email.<br />\nYour account won't be created until you access the link above and set your password."
+  time:
+    formats:
+      devise:
+        mailer:
+          invitation_instructions:
+            accept_until_format: "%B %d, %Y %I:%M %p"

+ 64 - 0
engine/config/locales/en.yml

@@ -0,0 +1,64 @@
+
+en:
+  hello: "Hello world"
+  koudoku:
+    confirmations:
+      feature_depends_on_devise: "This feature depends on prior authentication."
+      subscription_cancelled: "You successfully cancelled your subscription."
+      subscription_updated: "You successfully updated your subscription."
+      subscription_upgraded: "You successfully upgraded your subscription!"
+    failure:
+      problem_processing_transaction: "There was a problem processing this transaction."
+      unauthorized: "Unauthorized"
+    plan_intervals:
+      month: "month"
+      year: "year"
+      week: "week"
+      6month: "half-year"
+      3month: "quarter"
+    plan_difference:
+      downgrade: "Downgrade"
+      selected: "Selected"
+      start_trial: "Start Trial"
+      upgrade: "Upgrade"
+    payment:
+      cancel: "Cancel"
+      card_number: "Card Number"
+      card_on_file_with_last_four: "The card on file for your account ends with %{last_four}."
+      cvc: "CVC"
+      expiration: "Expiration (MM/YYYY)"
+      payment_information: "Payment Information"
+      save_billing_info: "Save Billing Information"
+      upgrade_account: "Upgrade Your Account"
+      update_payment_information: "Update Your Payment Information"
+    social_proof:
+      testimonial1:
+        person: "John Terry at BeeHive"
+        quote: "\"A bargain for the time I saved!\""
+      testimonial2:
+        person: "Michael Cornell at Truckers Inc."
+        quote: "\"This is a great compliment to SalesForce!\""
+      testimonial3:
+        person: "Randy Blare at Burman Bros."
+        quote: "\"I love you guys and you're the best!\""
+    subscriptions:
+      cancel_your_subscription: "cancel your subscription"
+      cancel_your_subscription_note_html: "You can also %{link}."
+      choose_other_plan: "Choose Another Plan"
+      choose_plan: "Choose A Plan"
+      free_trial_for_days: "%{days}-day Free Trial"
+      no_subscription: "No Subscription"
+      not_subscribed_to_plan: "You are not subscribed to a paid plan."
+      sign_up: "Sign up"
+      start_trial: "Start Your Free Trial"
+      subscribed: "You're Subscribed!"
+      subscribed_to_plan: "You're currently subscribed to the %{plan} plan."
+      upgrade_your_account: "Upgrade Your Account"
+      which_plan_is_best: "What Plan Is Best For You?"
+  activity:
+    website:
+      create: 'target acquired'
+      update: 'target engaged'
+      destroy: 'target destroyed'
+    user:
+      create: 'joined ContactRocket'

+ 6 - 0
engine/config/routes.rb

@@ -0,0 +1,6 @@
+
+
+Rails.application.routes.draw do
+
+end
+

+ 4 - 0
engine/config/schedule.rb

@@ -0,0 +1,4 @@
+# Use this file to easily define all of your cron jobs.
+#
+# It's helpful, but not entirely necessary to understand cron before proceeding.
+# http://en.wikipedia.org/wiki/Cron

+ 15 - 0
engine/config/secrets.yml

@@ -0,0 +1,15 @@
+---
+
+development: &default
+
+test:
+
+staging:
+  <<: *default
+  secret_key_base: <%= ENV['SECRET_KEY_BASE'] %>
+  secret_token: <%= ENV['SECRET_TOKEN'] %>
+
+production:
+  <<: *default
+  secret_key_base: <%= ENV['SECRET_KEY_BASE'] %>
+  secret_token: <%= ENV['SECRET_TOKEN'] %>

+ 25 - 0
engine/config/sidekiq.yml

@@ -0,0 +1,25 @@
+:concurrency: 10
+:timeout: 60
+
+development:
+  :concurrency: 10
+  :queues:
+    - [default,4]
+    - [crawler,2]
+    - searchkick
+    - text
+test:
+  :concurrency: 10
+  :queues:
+    - [default,4]
+    - [crawler,2]
+    - searchkick
+    - text
+production:
+  :concurrency: 10
+  :queues:
+    - [default,4]
+    - [crawler,2]
+    - searchkick
+    - validation
+    - text

Some files were not shown because too many files changed in this diff