From bc9e991b8665fdd8f77a257c5381cf70d015a6ec Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Sun, 25 Mar 2007 18:04:12 +0000 Subject: Utils: fix ircify_html (the final stripsvn diff lib/rbot/core/utils/extends.rb could cause it to return nil) and improve whitespace handling --- lib/rbot/core/utils/extends.rb | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/rbot/core/utils/extends.rb b/lib/rbot/core/utils/extends.rb index c43f3f3b..7022fb91 100644 --- a/lib/rbot/core/utils/extends.rb +++ b/lib/rbot/core/utils/extends.rb @@ -51,22 +51,32 @@ class ::String ## Maybe make it configurable? # txt.gsub!(/<\/?a( [^>]*)?>/, "#{Reverse}") - # Paragraph and br tags are converted to whitespace. + # Paragraph and br tags are converted to whitespace txt.gsub!(/<\/?(p|br)\s*\/?\s*>/, ' ') txt.gsub!("\n", ' ') + txt.gsub!("\r", ' ') # All other tags are just removed txt.gsub!(/<[^>]+>/, '') + # Convert HTML entities. We do it now to be able to handle stuff + # such as   + txt = Utils.decode_html_entities(txt) + # Remove double formatting options, since they only waste bytes txt.gsub!(/#{Bold}(\s*)#{Bold}/, '\1') txt.gsub!(/#{Underline}(\s*)#{Underline}/, '\1') + # Simplify whitespace that appears on both sides of a formatting option + txt.gsub!(/\s+(#{Bold}|#{Underline})\s+/, ' \1') + txt.sub!(/\s+(#{Bold}|#{Underline})\z/, '\1') + txt.sub!(/\A(#{Bold}|#{Underline})\s+/, '\1') + # And finally whitespace is squeezed txt.gsub!(/\s+/, ' ') # Decode entities and strip whitespace - return Utils.decode_html_entities(txt).strip! + return txt.strip end # This method will strip all HTML crud from the receiver -- cgit v1.2.3