cassandra_store_spec.rb 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. # encoding: UTF-8
  2. require 'cassandra'
  3. require 'logger'
  4. require 'polipus-cassandra'
  5. require 'spec_helper'
  6. describe Polipus::Storage::CassandraStore do
  7. before(:all)do
  8. @logger = Logger.new(STDOUT).tap { |logger| logger.level = Logger::WARN }
  9. @cluster = Cassandra.cluster hosts: ['127.0.0.1'], logger: @logger
  10. @keyspace = 'polipus_cassandra_test'
  11. @table = 'cassandra_store_test'
  12. @storage = Polipus::Storage::CassandraStore.new(
  13. cluster: @cluster,
  14. keyspace: @keyspace,
  15. table: @table,
  16. )
  17. @storage.keyspace!
  18. @storage.table!
  19. @storage_without_code_and_body = Polipus::Storage::CassandraStore.new(
  20. cluster: @cluster,
  21. keyspace: @keyspace,
  22. table: @table,
  23. except: ['code', 'body']
  24. )
  25. end
  26. after(:all) do
  27. @storage.clear
  28. end
  29. it 'should store a page' do
  30. p = page_factory 'http://www.google.com'
  31. uuid = @storage.add p
  32. expect(uuid).to eq('ed646a3334ca891fd3467db131372140')
  33. p = @storage.get p
  34. expect(p.url.to_s).to eq('http://www.google.com')
  35. expect(p.body).to eq('<html></html>')
  36. end
  37. it 'should store all the relevant data from the page' do
  38. url = "http://www.duckduckgo.com"
  39. referer = "http://www.actually.nowhere.com"
  40. redirectto = "#{url}/your_super_awesome_results?page=42"
  41. now = Time.now.to_i
  42. p = page_factory(
  43. url,
  44. {
  45. referer: referer,
  46. redirect_to: redirectto,
  47. fetched_at: now
  48. })
  49. uuid = @storage.add p
  50. expect(uuid).to eq('3cd657f53c74f22c1a21b420ce3863fd')
  51. p = @storage.get p
  52. expect(p.url.to_s).to eq(url)
  53. expect(p.referer.to_s).to eq(referer)
  54. expect(p.redirect_to.to_s).to eq(redirectto)
  55. expect(p.fetched_at).to eq(now)
  56. expect(p.body).to eq('<html></html>')
  57. # for the sake of the other tests...
  58. expect(@storage.remove(p)).to be_truthy
  59. end
  60. it 'should update a page' do
  61. p = page_factory 'http://www.google.com', code: 301
  62. @storage.add p
  63. p = @storage.get p
  64. expect(p.code).to eq(301)
  65. end
  66. it 'should iterate over stored pages' do
  67. @storage.each do |k, page|
  68. expect(k).to eq('ed646a3334ca891fd3467db131372140')
  69. expect(page.url.to_s).to eq('http://www.google.com')
  70. end
  71. end
  72. it 'should delete a page' do
  73. p = page_factory 'http://www.google.com', code: 301
  74. @storage.remove p
  75. expect(@storage.get(p)).to be_nil
  76. end
  77. it 'should store a page removing a query string from the uuid generation' do
  78. p = page_factory 'http://www.asd.com/?asd=lol'
  79. p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1'
  80. @storage.include_query_string_in_uuid = false
  81. @storage.add p
  82. expect(@storage.exists?(p_no_query)).to be_truthy
  83. @storage.remove p
  84. end
  85. it 'should store a page removing a query string from the uuid generation no ending slash' do
  86. p = page_factory 'http://www.asd.com?asd=lol'
  87. p_no_query = page_factory 'http://www.asd.com'
  88. @storage.include_query_string_in_uuid = false
  89. @storage.add p
  90. expect(@storage.exists?(p_no_query)).to be_truthy
  91. @storage.remove p
  92. end
  93. it 'should store a page with user data associated' do
  94. p = page_factory 'http://www.user.com'
  95. p.user_data.name = 'Test User Data'
  96. @storage.add p
  97. expect(@storage.exists?(p)).to be_truthy
  98. p = @storage.get(p)
  99. expect(p.user_data.name).to eq('Test User Data')
  100. @storage.remove p
  101. end
  102. it 'should honor the except parameters' do
  103. pag = page_factory 'http://www.user-doo.com'
  104. expect(pag.code).to eq(200)
  105. expect(pag.body).to eq('<html></html>')
  106. @storage_without_code_and_body.add(pag)
  107. pag = @storage_without_code_and_body.get(pag)
  108. expect(pag.body).to be_nil
  109. expect(pag.code).to eq(0)
  110. @storage_without_code_and_body.remove(pag)
  111. end
  112. it 'should return false if a doc not exists' do
  113. @storage.include_query_string_in_uuid = false
  114. p_other = page_factory 'http://www.asdrrrr.com'
  115. expect(@storage.exists?(p_other)).to be_falsey
  116. @storage.add p_other
  117. expect(@storage.exists?(p_other)).to be_truthy
  118. p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol'
  119. expect(@storage.exists?(p_other)).to be_truthy
  120. @storage.include_query_string_in_uuid = true
  121. expect(@storage.exists?(p_other)).to be_falsey
  122. @storage.include_query_string_in_uuid = false
  123. @storage.remove p_other
  124. end
  125. it 'should set page.fetched_at based on the id creation' do
  126. p = page_factory 'http://www.user-doojo.com'
  127. @storage.add p
  128. expect(p.fetched_at).to be_nil
  129. p = @storage.get p
  130. expect(p.fetched_at).not_to be_nil
  131. @storage.remove p
  132. end
  133. it 'should NOT set page.fetched_at if already present' do
  134. p = page_factory 'http://www.user-doojooo.com'
  135. p.fetched_at = 10
  136. @storage.add p
  137. p = @storage.get p
  138. expect(p.fetched_at).to be 10
  139. @storage.remove p
  140. end
  141. it 'should store two pages and the count will be two' do
  142. pages = ['http://www.google.com', 'http://www.duckduckgo.com'].map do |url|
  143. page_factory(url).tap do |page|
  144. @storage.add(page)
  145. end
  146. end
  147. expect(@storage.count).to be 2
  148. pages.each do |page|
  149. @storage.remove(page)
  150. end
  151. expect(@storage.count).to be 0
  152. end
  153. end