page.rb 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. require 'elasticsearch/model'
  2. ENV['POLIPUS_ELASTICSEARCH_INDEX_SHARDS'] ||= '1'
  3. ENV['POLIPUS_ELASTICSEARCH_INDEX_REPLICAS'] ||= '0'
  4. module Polipus
  5. module ElasticSearch
  6. class Page
  7. include Elasticsearch::Model
  8. DEFAULT_INDEX_NAME = 'polipus-pages'
  9. document_type 'polipus_page'
  10. index_name DEFAULT_INDEX_NAME
  11. settings(
  12. index: {
  13. number_of_shards: ENV['POLIPUS_ELASTICSEARCH_INDEX_SHARDS'].to_i,
  14. number_of_replicas: ENV['POLIPUS_ELASTICSEARCH_INDEX_REPLICAS'].to_i
  15. }
  16. )
  17. mapping(_all: { enabled: false }) do
  18. indexes(
  19. :id,
  20. index: :not_analyzed
  21. )
  22. indexes(
  23. :body,
  24. type: :string
  25. )
  26. indexes(
  27. :code,
  28. type: :integer
  29. )
  30. indexes(
  31. :depth,
  32. type: :integer
  33. )
  34. indexes(
  35. :error,
  36. type: :string
  37. )
  38. indexes(
  39. :fetched,
  40. type: :boolean
  41. )
  42. indexes(
  43. :fetched_at,
  44. type: :integer
  45. )
  46. indexes(
  47. :headers,
  48. type: :string
  49. )
  50. indexes(
  51. :links,
  52. type: :string
  53. )
  54. indexes(
  55. :redirect_to,
  56. type: :string
  57. )
  58. indexes(
  59. :referer,
  60. type: :string
  61. )
  62. indexes(
  63. :response_time,
  64. type: :integer
  65. )
  66. indexes(
  67. :url,
  68. type: :string
  69. )
  70. indexes(
  71. :user_data,
  72. type: :string
  73. )
  74. end
  75. def self.client
  76. __elasticsearch__.client
  77. end
  78. def self.count
  79. client.count(index: index_name, type: document_type)['count'].to_i
  80. end
  81. def self.create_index!(name)
  82. index_name(name) unless name.nil?
  83. __elasticsearch__.create_index!(index: index_name)
  84. end
  85. def self.clear_index!
  86. client.delete_by_query(
  87. index: index_name,
  88. body: { query: { match_all: {} } }
  89. )
  90. end
  91. def self.delete_index!
  92. client.indices.delete(index: index_name)
  93. end
  94. def self.exists?(id)
  95. client.exists?(
  96. index: index_name,
  97. type: document_type,
  98. id: id
  99. )
  100. end
  101. def self.get(id)
  102. return unless exists?(id)
  103. client.get_source(
  104. index: index_name,
  105. type: document_type,
  106. id: id
  107. )
  108. end
  109. def self.index_exists?
  110. client.indices.exists?(index: index_name)
  111. end
  112. def self.process_document(obj)
  113. doc = { '_type' => document_type }
  114. properties.each do |p|
  115. doc[p.to_s] = obj.respond_to?(p.to_s) ? obj.send(p.to_s) : obj[p.to_s]
  116. end
  117. doc.reject { |_, value| value.nil? }
  118. end
  119. def self.properties
  120. mapping.to_hash[document_type.to_sym][:properties].keys.map { |k| k.to_s }
  121. end
  122. def self.remove(id, refresh = false)
  123. return unless exists?(id)
  124. client.delete(
  125. index: index_name,
  126. type: document_type,
  127. id: id,
  128. refresh: refresh,
  129. version: Time.now.to_i,
  130. version_type: :external
  131. )
  132. end
  133. def self.setup(client_, index_name = DEFAULT_INDEX_NAME)
  134. __elasticsearch__.client = client_
  135. self.index_name(index_name)
  136. end
  137. def self.store(document, refresh = false)
  138. document = process_document(document)
  139. client.index(
  140. index: index_name,
  141. type: document_type,
  142. id: document['id'],
  143. body: document,
  144. refresh: refresh,
  145. version: document['fetched_at'].to_i,
  146. version_type: :external
  147. )
  148. document['id']
  149. end
  150. end
  151. end
  152. end