summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2008-03-23 11:31:58 +0100
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2008-03-23 11:31:58 +0100
commitee04b98df5b92e7683710cfcb4147d8355d1a525 (patch)
treec386a3822b9b8c3b25ae6bc1f6be68687a9643e8
parent620239d6bb3ad85c3f6af4818989b0e0f7e7e631 (diff)
rss plugin: check for <channel when <rdf:RDF is found
RSS htmlinfo filter was catching some false positives because some web pages have embedded RDF resources (e.g. for trackbacks) even though they are not RSS feeds. So check for the obligatory channel tag when an rdf:RDF tag is found.
-rw-r--r--data/rbot/plugins/rss.rb3
1 files changed, 2 insertions, 1 deletions
diff --git a/data/rbot/plugins/rss.rb b/data/rbot/plugins/rss.rb
index 8bf59dfc..bc4fd369 100644
--- a/data/rbot/plugins/rss.rb
+++ b/data/rbot/plugins/rss.rb
@@ -370,7 +370,8 @@ class RSSFeedsPlugin < Plugin
def htmlinfo_filter(s)
return nil unless s[:headers] and s[:headers]['x-rbot-location']
return nil unless s[:headers]['content-type'].first.match(/xml|rss|atom|rdf/i) or
- s[:text].include?("<rdf:RDF") or s[:text].include?("<rss") or s[:text].include?("<feed") or
+ (s[:text].include?("<rdf:RDF") and s[:text].include?("<channel")) or
+ s[:text].include?("<rss") or s[:text].include?("<feed") or
s[:text].match(FEED_NS)
blob = RssBlob.new(s[:headers]['x-rbot-location'],"", :htmlinfo)
unless (fetchRss(blob, nil) and parseRss(blob, nil) rescue nil)