ebay_parser.rb 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. #
  2. # ebay_parser.rb
  3. # MacBidwatcher
  4. #
  5. # Created by Morgan Schweers on 7/24/11.
  6. # Copyright 2011 CyberFOX Software, Inc. All rights reserved.
  7. #
  8. require 'parser'
  9. class NilClass; def to_nil; end end
  10. class String; def to_nil; self unless empty? end end
  11. class Array; def to_nil; self unless empty? end end
  12. class Ebay
  13. class Parser < ::Parser
  14. PRICE_MATCH=[%r{(Discounted.price|(Current.bid|Winning.bid|Starting.bid|Price|Sold.[Ff]or):)}, %r{.*[0-9].*}]
  15. LOCATION_MATCH=[%r{(?i)item.location:}, %r{.*}]
  16. END_DATE_MATCH=[%r{^\(.*$}, %r{^.*(P[SD]T|GMT)\)$}]
  17. ENDED_DATE_MATCH=[%r{^Ended:$},%r{.*}, %r{.*(P[SD]T|GMT)$}]
  18. SHIPPING_MATCH=[%r{^Shipping:$}, %r{.*}]
  19. ITEM_MATCH=[%r{^Item.number:}, %r{[0-9]+}]
  20. REVERSE_ITEM_MATCH=[%r{[0-9]+}, %r{Item.number:}]
  21. HISTORY_MATCH=[%r{^Bid.history:$}, %r{[0-9]+}, %r{bids?}]
  22. ENDED_HISTORY_MATCH=[%r{^Bid.history:$}, %r{[0-9]+.bids?}]
  23. LIVE_HISTORY_MATCH = PRICE_MATCH + ["\\[", %r{[0-9]+ bids?}]
  24. BIDCOUNT_MATCH = ["\\[", %r{\d+}, %r{bids?}, "\\]"]
  25. LIVE_SELLER_MATCH = [%r{Member id}, %r{.*}, %r{\(}, %r{Feedback Score.*}, %r{[0-9]+}, %r{\)}, %r{[0-9.]+.*}]
  26. ENDED_SELLER_MATCH= ['Seller:'] + LIVE_SELLER_MATCH[0..4]
  27. def parse_extended_end_date
  28. end_date = match_set(END_DATE_MATCH)
  29. if end_date
  30. ends_at = end_date.join(' ')
  31. ends_at = ends_at[1..-2]
  32. else
  33. ends_at = match_set(ENDED_DATE_MATCH)
  34. ends_at = ends_at[1..-1].join(' ') if ends_at
  35. end
  36. ends_at
  37. end
  38. def parse_bid_count
  39. if bid_history = match_set(HISTORY_MATCH)
  40. bid_history[1]
  41. elsif bid_history = match_set(LIVE_HISTORY_MATCH)
  42. bid_history[3].split.first
  43. elsif bid_history = match_set(ENDED_HISTORY_MATCH)
  44. bid_history[1].split.first
  45. end
  46. end
  47. def extract_title(title, title_array)
  48. title.gsub(title_array[0], '').to_nil || @page.css('meta[property="og:title"]').attribute('content').value
  49. end
  50. def extract_id(title_array)
  51. id_match = match_set(REVERSE_ITEM_MATCH)
  52. return id_match.first
  53. end
  54. def old_extract_id(title_array)
  55. # return "261116602845"
  56. id = title_array[1]
  57. unless id
  58. id_match = match_set(ITEM_MATCH)
  59. id = id_match.last if id_match
  60. unless id
  61. id_match = match_set(REVERSE_ITEM_MATCH)
  62. id = id_match.first if id_match
  63. unless id
  64. item_id_search = @page.css('span:contains("Item number")').text
  65. if item_id_search && item_id_search != []
  66. potential_id = item_id_search.match(%r{Item.number:?.(\d+)})
  67. id = potential_id[1]
  68. end
  69. end
  70. end
  71. end
  72. id
  73. end
  74. def parse_title
  75. title = @page.title
  76. title_array = title.match(%r{(?: \| eBay)|- eBay \(item ([0-9]+) end time *([^)]+)\)})
  77. if title_array.nil?
  78. nil
  79. else
  80. title = extract_title(title, title_array)
  81. id = extract_id(title_array)
  82. ends_at = title_array[2]
  83. { :title => title, :id => id, :ends_at => ends_at }
  84. end
  85. end
  86. def extract_thumbnail
  87. thumbnail_image_set = (@page / 'img').select do |node|
  88. if node
  89. node[:src] =~ /ebayimg.com/
  90. else
  91. false
  92. end
  93. end.to_a.first
  94. thumbnail_image = thumbnail_image_set[:src] if thumbnail_image_set
  95. thumbnail_image || @page.css('meta[property="og:image"]').attribute('content').value
  96. end
  97. # @return [Array[feedback_percent, feedback_score, seller_name]] The seller-related information; feedback_percent may be nil if the item is ended.
  98. def extract_seller_info
  99. seller_info = match_set(LIVE_SELLER_MATCH)
  100. seller_name, feedback_score, feedback_percent = if seller_info
  101. [seller_info[1], seller_info[4], seller_info.last.split('%').first]
  102. end
  103. unless seller_info
  104. seller_info = match_set(ENDED_SELLER_MATCH)
  105. seller_name, feedback_score = if seller_info
  106. [seller_info[2], seller_info[5]]
  107. end
  108. end
  109. return feedback_percent, feedback_score, seller_name
  110. end
  111. # @return [Hash] Parsed data from the auction as a key/value hash, or nil if the title isn't recognizable.
  112. def parse
  113. title_info = parse_title
  114. return nil unless title_info
  115. thumbnail_field = {}
  116. thumbnail_field['thumbnail_url'] = extract_thumbnail
  117. price = match_set(PRICE_MATCH).to_a.last.to_nil || @page.css('span[itemprop="price"]').text
  118. location = match_set(LOCATION_MATCH).to_a.last
  119. shipping = match_set(SHIPPING_MATCH).to_a.last
  120. ends_at = title_info[:ends_at] || parse_extended_end_date
  121. bid_count = parse_bid_count
  122. test_price = @page.css("[itemprop='offers'] .convPrice #bidPrice").text
  123. unless bid_count
  124. found = match_set(BIDCOUNT_MATCH)
  125. bid_count = found[1] if found
  126. end
  127. feedback_percent, feedback_score, seller_name = extract_seller_info
  128. { :title => title_info[:title].to_s.strip,
  129. :location => location.to_s.strip,
  130. :current_price => price,
  131. :us_price => test_price,
  132. :end_date => ends_at,
  133. :shipping => shipping,
  134. :bid_count => bid_count,
  135. :identifier => title_info[:id].to_s.strip,
  136. :seller => {
  137. :seller_name => seller_name.to_s.strip,
  138. :feedback => feedback_score.to_s.strip,
  139. :feedback_percent => feedback_percent.to_s.strip
  140. }
  141. }.merge(thumbnail_field)
  142. end
  143. end
  144. end