elasticsearch_store_spec.rb 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. # encoding: UTF-8
  2. require 'logger'
  3. require 'polipus-elasticsearch'
  4. require 'spec_helper'
  5. describe Polipus::Storage::ElasticSearchStore do
  6. before(:each)do
  7. @logger = Logger.new(STDOUT)
  8. @client = Elasticsearch::Client.new(host: '127.0.0.1', logger: @logger)
  9. @client.transport.logger.level = Logger::INFO
  10. @index_name = 'polipus_elasticsearch_test'
  11. @storage = Polipus::Storage::ElasticSearchStore.new(
  12. @client,
  13. index_name: @index_name,
  14. refresh: true
  15. )
  16. @storage_without_code_and_body = Polipus::Storage::ElasticSearchStore.new(
  17. @client,
  18. index_name: @index_name,
  19. except: ['code', 'body'],
  20. refresh: true
  21. )
  22. end
  23. after(:each) do
  24. @storage.drop
  25. end
  26. it 'should store a page' do
  27. p = page_factory 'http://www.google.com'
  28. uuid = @storage.add(p)
  29. expect(uuid).to eq('ed646a3334ca891fd3467db131372140')
  30. p = @storage.get(p)
  31. expect(p).not_to be_nil
  32. expect(p.url.to_s).to eq('http://www.google.com')
  33. expect(p.body).to eq('<html></html>')
  34. @storage.remove(p)
  35. p = @storage.get(p)
  36. expect(p).to be_nil
  37. end
  38. it 'should store all the relevant data from the page' do
  39. url = "http://www.duckduckgo.com"
  40. referer = "http://www.actually.nowhere.com"
  41. redirectto = "#{url}/your_super_awesome_results?page=42"
  42. now = Time.now.to_i
  43. p = page_factory(
  44. url,
  45. {
  46. referer: referer,
  47. redirect_to: redirectto,
  48. fetched_at: now
  49. })
  50. uuid = @storage.add p
  51. expect(uuid).to eq('3cd657f53c74f22c1a21b420ce3863fd')
  52. p = @storage.get p
  53. expect(p.url.to_s).to eq(url)
  54. expect(p.referer.to_s).to eq(referer)
  55. expect(p.redirect_to.to_s).to eq(redirectto)
  56. expect(p.fetched_at).to eq(now)
  57. expect(p.body).to eq('<html></html>')
  58. # for the sake of the other tests...
  59. expect(@storage.remove(p)).to be_truthy
  60. end
  61. it 'should update a page' do
  62. p = page_factory 'http://www.google.com', code: 301
  63. @storage.add p
  64. p = @storage.get p
  65. expect(p.code).to eq(301)
  66. end
  67. it 'should iterate over stored pages' do
  68. p = page_factory('http://www.google.com')
  69. @storage.add(p)
  70. @storage.each do |k, page|
  71. expect(k).to eq('ed646a3334ca891fd3467db131372140')
  72. expect(page.url.to_s).to eq('http://www.google.com')
  73. end
  74. end
  75. it 'should delete a page' do
  76. p = page_factory 'http://www.google.com', code: 301
  77. @storage.remove p
  78. expect(@storage.get(p)).to be_nil
  79. end
  80. it 'should store a page removing a query string from the uuid generation' do
  81. p = page_factory 'http://www.asd.com/?asd=lol'
  82. p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1'
  83. @storage.include_query_string_in_uuid = false
  84. @storage.add p
  85. expect(@storage.exists?(p_no_query)).to be_truthy
  86. @storage.remove p
  87. end
  88. it 'should store a page removing a query string from the uuid generation no ending slash' do
  89. p = page_factory 'http://www.asd.com?asd=lol'
  90. p_no_query = page_factory 'http://www.asd.com'
  91. @storage.include_query_string_in_uuid = false
  92. @storage.add p
  93. expect(@storage.exists?(p_no_query)).to be_truthy
  94. @storage.remove p
  95. end
  96. it 'should store a page with user data associated' do
  97. p = page_factory 'http://www.user.com'
  98. p.user_data.name = 'Test User Data'
  99. @storage.add p
  100. expect(@storage.exists?(p)).to be_truthy
  101. p = @storage.get(p)
  102. expect(p.user_data.name).to eq('Test User Data')
  103. @storage.remove p
  104. end
  105. it 'should honor the except parameters' do
  106. pag = page_factory 'http://www.user-doo.com'
  107. expect(pag.code).to eq(200)
  108. expect(pag.body).to eq('<html></html>')
  109. @storage_without_code_and_body.add(pag)
  110. pag = @storage_without_code_and_body.get(pag)
  111. expect(pag.body).to be_nil
  112. expect(pag.code).to eq(0)
  113. @storage_without_code_and_body.remove(pag)
  114. end
  115. it 'should return false if a doc not exists' do
  116. @storage.include_query_string_in_uuid = false
  117. p_other = page_factory 'http://www.asdrrrr.com'
  118. expect(@storage.exists?(p_other)).to be_falsey
  119. @storage.add p_other
  120. expect(@storage.exists?(p_other)).to be_truthy
  121. p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol'
  122. expect(@storage.exists?(p_other)).to be_truthy
  123. @storage.include_query_string_in_uuid = true
  124. expect(@storage.exists?(p_other)).to be_falsey
  125. @storage.include_query_string_in_uuid = false
  126. @storage.remove p_other
  127. end
  128. it 'should set page.fetched_at based on the id creation' do
  129. p = page_factory 'http://www.user-doojo.com'
  130. @storage.add p
  131. expect(p.fetched_at).not_to be_nil
  132. p = @storage.get p
  133. expect(p.fetched_at).not_to be_nil
  134. @storage.remove p
  135. end
  136. it 'should NOT set page.fetched_at if already present' do
  137. p = page_factory 'http://www.user-doojooo.com'
  138. p.fetched_at = 10
  139. @storage.add p
  140. p = @storage.get p
  141. expect(p.fetched_at).to be 10
  142. @storage.remove p
  143. end
  144. it 'should store two pages and the count will be two' do
  145. pages = ['http://www.google.com', 'http://www.duckduckgo.com'].map do |url|
  146. page_factory(url).tap do |page|
  147. @storage.add(page)
  148. end
  149. end
  150. expect(@storage.count).to be 2
  151. pages.each do |page|
  152. @storage.remove(page)
  153. end
  154. expect(@storage.count).to be 0
  155. end
  156. end