summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-03-05 09:47:01 +0000
committerGiuseppe Bilotta <giuseppe.bilotta@gmail.com>2007-03-05 09:47:01 +0000
commitd8a22eb45b2fd816319a58786d18cf00b4b16031 (patch)
tree823eff08f3c6cc46fa63e3f674df9e4902f3646e
parent7d4892880e67437fb515771525cd51f28eb4ded8 (diff)
Rework netmask/hostname detection code to work around non-RFC-compliant servers
-rw-r--r--lib/rbot/core/utils/extends.rb36
-rw-r--r--lib/rbot/irc.rb78
-rw-r--r--lib/rbot/rfc2812.rb7
3 files changed, 84 insertions, 37 deletions
diff --git a/lib/rbot/core/utils/extends.rb b/lib/rbot/core/utils/extends.rb
index e882148b..c43f3f3b 100644
--- a/lib/rbot/core/utils/extends.rb
+++ b/lib/rbot/core/utils/extends.rb
@@ -95,23 +95,9 @@ class ::Regexp
IN_ON = /in|on/
- # We start with some IRC related regular expressions, used to match
- # Irc::User nicks and Irc::Channel names
- #
- # For each of them we define three versions of the regular expression:
- # * a generic one, which should match for any server but may turn out to
- # match more than a specific server would accept
- # * an RFC-compliant matcher
- # * TODO a server-specific one that uses the Irc::Server#supports method to build
- # a matcher valid for a particular server.
- #
module Irc
- CHAN_FIRST = /[#&+]/
- CHAN_SAFE = /![A-Z0-9]{5}/
- CHAN_ANY = /[^\x00\x07\x0A\x0D ,:]/
- GEN_CHAN = /(?:#{CHAN_FIRST}|#{CHAN_SAFE})#{CHAN_ANY}+/
- RFC_CHAN = /#{CHAN_FIRST}#{CHAN_ANY}{1,49}|#{CHAN_SAFE}#{CHAN_ANY}{1,44}/
-
+ # Match a list of channel anmes separated by optional commas, whitespace
+ # and optionally the word "and"
CHAN_LIST = Regexp.new_list(GEN_CHAN)
# Match "in #channel" or "on #channel" and/or "in private" (optionally
@@ -126,28 +112,12 @@ class ::Regexp
IN_CHAN_LIST_PVT_SFX = Regexp.new_list(/#{GEN_CHAN}|here|private|pvt/, IN_ON)
IN_CHAN_LIST_PVT = /#{IN_ON}\s+#{IN_CHAN_LIST_PVT_SFX}|anywhere|everywhere/
- SPECIAL_CHAR = /[\x5b-\x60\x7b-\x7d]/
- NICK_FIRST = /#{SPECIAL_CHAR}|[[:alpha:]]/
- NICK_ANY = /#{SPECIAL_CHAR}|[[:alnum:]]|-/
- GEN_NICK = /#{NICK_FIRST}#{NICK_ANY}+/
- RFC_NICK = /#{NICK_FIRST}#{NICK_ANY}{0,8}/
-
# Match a list of nicknames separated by optional commas, whitespace and
# optionally the word "and"
- NICK_LIST = Regexp.new_list(GEN_CHAN)
+ NICK_LIST = Regexp.new_list(GEN_NICK)
end
- # Next, some general purpose ones
- DIGITS = /\d+/
- HEX_DIGIT = /[0-9A-Fa-f]/
- HEX_DIGITS = /#{HEX_DIGIT}+/
- HEX_OCTET = /#{HEX_DIGIT}#{HEX_DIGIT}?/
- DEC_OCTET = /[01]?\d?\d|2[0-4]\d|25[0-5]/
- DEC_IP_ADDR = /#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}/
- HEX_IP_ADDR = /#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}/
- IP_ADDR = /#{DEC_IP_ADDR}|#{HEX_IP_ADDR}/
-
end
diff --git a/lib/rbot/irc.rb b/lib/rbot/irc.rb
index a0ffbd91..1c9d4dcd 100644
--- a/lib/rbot/irc.rb
+++ b/lib/rbot/irc.rb
@@ -475,6 +475,82 @@ class ArrayOf < Array
end
+# We extend the Regexp class with an Irc module which will contain some
+# Irc-specific regexps
+#
+class Regexp
+
+ # We start with some general-purpose ones which will be used in the
+ # Irc module too, but are useful regardless
+ DIGITS = /\d+/
+ HEX_DIGIT = /[0-9A-Fa-f]/
+ HEX_DIGITS = /#{HEX_DIGIT}+/
+ HEX_OCTET = /#{HEX_DIGIT}#{HEX_DIGIT}?/
+ DEC_OCTET = /[01]?\d?\d|2[0-4]\d|25[0-5]/
+ DEC_IP_ADDR = /#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}.#{DEC_OCTET}/
+ HEX_IP_ADDR = /#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}.#{HEX_OCTET}/
+ IP_ADDR = /#{DEC_IP_ADDR}|#{HEX_IP_ADDR}/
+
+ # IPv6, from Resolv::IPv6, without the \A..\z anchors
+ HEX_16BIT = /#{HEX_DIGIT}{1,4}/
+ IP6_8Hex = /(?:#{HEX_16BIT}:){7}#{HEX_16BIT}/
+ IP6_CompressedHex = /((?:#{HEX_16BIT}(?::#{HEX_16BIT})*)?)::((?:#{HEX_16BIT}(?::#{HEX_16BIT})*)?)/
+ IP6_6Hex4Dec = /((?:#{HEX_16BIT}:){6,6})#{DEC_IP_ADDR}/
+ IP6_CompressedHex4Dec = /((?:#{HEX_16BIT}(?::#{HEX_16BIT})*)?)::((?:#{HEX_16BIT}:)*)#{DEC_IP_ADDR}/
+ IP6_ADDR = /(?:#{IP6_8Hex})|(?:#{IP6_CompressedHex})|(?:#{IP6_6Hex4Dec})|(?:#{IP6_CompressedHex4Dec})/
+
+ # We start with some IRC related regular expressions, used to match
+ # Irc::User nicks and users and Irc::Channel names
+ #
+ # For each of them we define two versions of the regular expression:
+ # * a generic one, which should match for any server but may turn out to
+ # match more than a specific server would accept
+ # * an RFC-compliant matcher
+ #
+ module Irc
+
+ # Channel-name-matching regexps
+ CHAN_FIRST = /[#&+]/
+ CHAN_SAFE = /![A-Z0-9]{5}/
+ CHAN_ANY = /[^\x00\x07\x0A\x0D ,:]/
+ GEN_CHAN = /(?:#{CHAN_FIRST}|#{CHAN_SAFE})#{CHAN_ANY}+/
+ RFC_CHAN = /#{CHAN_FIRST}#{CHAN_ANY}{1,49}|#{CHAN_SAFE}#{CHAN_ANY}{1,44}/
+
+ # Nick-matching regexps
+ SPECIAL_CHAR = /[\x5b-\x60\x7b-\x7d]/
+ NICK_FIRST = /#{SPECIAL_CHAR}|[[:alpha:]]/
+ NICK_ANY = /#{SPECIAL_CHAR}|[[:alnum:]]|-/
+ GEN_NICK = /#{NICK_FIRST}#{NICK_ANY}+/
+ RFC_NICK = /#{NICK_FIRST}#{NICK_ANY}{0,8}/
+
+ USER_CHAR = /[^\x00\x0a\x0d @]/
+ GEN_USER = /#{USER_CHAR}+/
+
+ # Host-matching regexps
+ HOSTNAME_COMPONENT = /[[:alnum:]](?:[[:alnum:]]|-)*[[:alnum:]]*/
+ HOSTNAME = /#{HOSTNAME_COMPONENT}(?:\.#{HOSTNAME_COMPONENT})*/
+ HOSTADDR = /#{IP_ADDR}|#{IP6_ADDR}/
+
+ GEN_HOST = /#{HOSTNAME}|#{HOSTADDR}/
+
+ # FreeNode network replaces the host of affiliated users with
+ # 'virtual hosts'
+ # FIXME we need the true syntax to match it properly ...
+ PDPC_HOST_PART = /[0-9A-Za-z.-]+/
+ PDPC_HOST = /#{PDPC_HOST_PART}(?:\/#{PDPC_HOST_PART})+/
+
+ # NOTE: the final optional and non-greedy dot is needed because some
+ # servers (e.g. FreeNode) send the hostname of the services as "services."
+ # which is not RFC compliant, but sadly done.
+ GEN_MASK_HOST = /#{PDPC_HOST}|#{GEN_HOST}\.??/
+
+ # Netmask-matching Regexp
+ GEN_MASK = /(#{GEN_NICK})(?:(?:!(#{GEN_USER}))?@(#{GEN_MASK_HOST}))?/
+ end
+
+end
+
+
module Irc
@@ -517,7 +593,7 @@ module Irc
# Now we can see if the given string _str_ is an actual Netmask
if str.respond_to?(:to_str)
case str.to_str
- when /^(?:(\S+?)(?:!(\S+)@(?:(\S+))?)?)?$/
+ when /^(?:#{Regexp::Irc::GEN_MASK})?$/
# We do assignment using our internal methods
self.nick = $1
self.user = $2
diff --git a/lib/rbot/rfc2812.rb b/lib/rbot/rfc2812.rb
index 97181b03..efa50035 100644
--- a/lib/rbot/rfc2812.rb
+++ b/lib/rbot/rfc2812.rb
@@ -902,13 +902,14 @@ module Irc
# This is not always true, though, since some servers do not send a
# full hostmask for user messages.
#
- if prefix =~ /^(?:\S+)(?:!\S+)?@(?:\S+)$/
+ if prefix =~ /^#{Regexp::Irc::GEN_MASK}$/
data[:source] = @server.user(prefix)
else
if @server.hostname
if @server.hostname != prefix
- debug "Origin #{prefix} for message\n\t#{serverstring.inspect}\nis neither a user hostmask nor the server hostname, assuming it's a nick"
- data[:source] = @server.user(prefix)
+ # TODO do we want to be able to differentiated messages that are passed on to us from /other/ servers?
+ debug "Origin #{prefix} for message\n\t#{serverstring.inspect}\nis neither a user hostmask nor the server hostname\nI'll pretend that it's from the server anyway"
+ data[:source] = @server
else
data[:source] = @server
end