Rich Brown
Joined: 08 Jul 2007 Posts: 75 Location: Hanover, NH
|
Posted: Thu Sep 25, 2008 11:24 am Post subject: A Regular Expression for IPv6 Addresses |
|
|
Stephen Ryan at Dartware has produced a regular expression (regex) that can be used to match any legal format of an IPv6 address. This is useful for determining whether a particular string is, in fact, a legal IPv6 address.
A quick search of Google for "IPv6 regex" or "regex for IPv6" gives lots of possibilities, many of which work for certain cases.
This regex for IPv6 differs from those others in that it handles all the cases specified by RFC4291, section 2.2, "Text Representation of Addresses", and in particular, it offers the ability to recognize an IPv4 dotted quad address at the end.
Here's the regex for IPv6 addresses. (Note: this should all be on one line.)
| Code: | /^\s*((([0-9A-Fa-f]{1,4}:){7}(([0-9A-Fa-f]{1,4})|:))|(([0-9A-Fa-f]{1,4}:){6}(:|((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(:[0-9A-Fa-f]{1,4})))|(([0-9A-Fa-f]{1,4}:){5}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(([0-9A-Fa-f]{1,4}:){4}(:[0-9A-Fa-f]{1,4}){0,1}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(([0-9A-Fa-f]{1,4}:){3}(:[0-9A-Fa-f]{1,4}){0,2}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(([0-9A-Fa-f]{1,4}:){2}(:[0-9A-Fa-f]{1,4}){0,3}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(([0-9A-Fa-f]{1,4}:)(:[0-9A-Fa-f]{1,4}){0,4}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(:(:[0-9A-Fa-f]{1,4}){0,5}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))(%.+)?\s*$/;
|
The regex matches the following IPv6 address forms. Note that these are all the same address:
fe80:0000:0000:0000:0204:61ff:fe9d:f156 // full form of IPv6
fe80:0:0:0:204:61ff:fe9d:f156 // drop leading zeroes
fe80::204:61ff:fe9d:f156 // collapse multiple zeroes to :: in the IPv6 address
fe80:0000:0000:0000:0204:61ff:254.157.241.086 // IPv4 dotted quad at the end
fe80:0:0:0:0204:61ff:254.157.241.86 // drop leading zeroes, IPv4 dotted quad at the end
fe80::204:61ff:254.157.241.86 // dotted quad at the end, multiple zeroes collapsed
In addition, the regular expression matches these IPv6 forms:
::1 // localhost
fe80:: // link-local prefix
2001:: // global unicast prefix
The attached Perl script tests the regular expression against sample IPv6 addresses, both good and bad. The program also compares against a couple of sample regex for IPv6 expressions I found via a quick Google search, one of which was from somebody complaining about errors in other regular expressions found via quick Google searches It prints a dot for each succesfully matched (or not matched!) potential address, 1 if one of the other regexes failed it, or 2 if both failed it. It also prints a big nasty failure message if it matches when it shouldn't, or vice versa.
Of course, it's possible that we've missed a case here, so if you find a good counterexample, please send us a note at support@dartware.com!
| Code: | #! /usr/bin/perl
sub is_ipv6
{
my($addr) = @_;
# H=[0-9A-Fa-f]{1,4}
# V4=(25[0-5]|2[0-4][0-9]|[01]?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9]{1,2})){3}
# ^\s*(
# ((H:){7}((H)|:))|
# ((H:){6}(:|V4|(:H)))|
# ((H:){5}((:V4?)|((:H){1,2})))|
# ((H:){4}(:H){0,1}((:V4?)|((:H){1,2})))|
# ((H:){3}(:H){0,2}((:V4?)|((:H){1,2})))|
# ((H:){2}(:H){0,3}((:V4?)|((:H){1,2})))|
# ((H:)(:H){0,4}((:V4?)|((:H){1,2})))|
# (:(:H){0,5}((:V4?)|((:H){1,2})))|
# (V4)
# )(%.+)?\s*$
# Here is my regex;
return $addr =~ /^\s*((([0-9A-Fa-f]{1,4}:){7}(([0-9A-Fa-f]{1,4})|:))|(([0-9A-Fa-f]{1,4}:){6}(:|((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(:[0-9A-Fa-f]{1,4})))|(([0-9A-Fa-f]{1,4}:){5}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(([0-9A-Fa-f]{1,4}:){4}(:[0-9A-Fa-f]{1,4}){0,1}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(([0-9A-Fa-f]{1,4}:){3}(:[0-9A-Fa-f]{1,4}){0,2}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(([0-9A-Fa-f]{1,4}:){2}(:[0-9A-Fa-f]{1,4}){0,3}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(([0-9A-Fa-f]{1,4}:)(:[0-9A-Fa-f]{1,4}){0,4}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(:(:[0-9A-Fa-f]{1,4}){0,5}((:((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|((:[0-9A-Fa-f]{1,4}){1,2})))|(((25[0-5]|2[0-4]\d|[01]?\d{1,2})(\.(25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))(%.+)?\s*$/;
}
sub other_is_ipv6
{
# This is a regex I found on the net. It differs in the "IPv4 address at the end" format, and I think is wrong.
# In particular, if I try to ping fe80::217:f2ff:254.7.237.98 (the IPv6 address of my Mac), it works, which means that ping6 on Ubuntu follows my interpretation of RFC mumbledypeg.
my($addr) = @_;
return $addr =~ /^\s*((([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}:[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){5}:([0-9A-Fa-f]{1,4}:)?[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){4}:([0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){3}:([0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){2}:([0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){6}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(([0-9A-Fa-f]{1,4}:){0,5}:((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|(::([0-9A-Fa-f]{1,4}:){0,5}((\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b)\.){3}(\b((25[0-5])|(1\d{2})|(2[0-4]\d)|(\d{1,2}))\b))|([0-9A-Fa-f]{1,4}::([0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})|(::([0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})|(([0-9A-Fa-f]{1,4}:){1,7}:))\s*$/;
}
sub other2_is_ipv6
{
# This is a regex I found on the net. It differs in the "IPv4 address at the end" format, and I think is wrong.
# In particular, if I try to ping fe80::217:f2ff:254.7.237.98 (the IPv6 address of my Mac), it works.
my($addr) = @_;
return $addr =~ /(^\s*([0-9A-Fa-f]{1,4}:){1,1}(:[0-9A-Fa-f]{1,4}){1,6}\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){1,2}(:[0-9A-Fa-f]{1,4}){1,5}\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){1,3}(:[0-9A-Fa-f]{1,4}){1,4}\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){1,4}(:[0-9A-Fa-f]{1,4}){1,3}\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){1,5}(:[0-9A-Fa-f]{1,4}){1,2}\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){1,6}(:[0-9A-Fa-f]{1,4}){1,1}\s*$)|(^\s*(([0-9A-Fa-f]{1,4}:){1,7}|:):\s*$)|(^\s*:(:[0-9A-Fa-f]{1,4}){1,7}\s*$)|(^\s*((([0-9A-Fa-f]{1,4}:){6})(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})\s*$)|(^\s*(([0-9A-Fa-f]{1,4}:){5}[0-9A-Fa-f]{1,4}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3})\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){5}:[0-9A-Fa-f]{1,4}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){1,1}(:[0-9A-Fa-f]{1,4}){1,4}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){1,2}(:[0-9A-Fa-f]{1,4}){1,3}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){1,3}(:[0-9A-Fa-f]{1,4}){1,2}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\s*$)|(^\s*([0-9A-Fa-f]{1,4}:){1,4}(:[0-9A-Fa-f]{1,4}){1,1}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\s*$)|(^\s*(([0-9A-Fa-f]{1,4}:){1,5}|:):(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\s*$)|(^\s*:(:[0-9A-Fa-f]{1,4}){1,5}:(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\s*$)/
}
sub XTEST
{
my($bool,$addr) = @_;
my($isit, $count);
$isit = is_ipv6($addr);
if ($isit == $bool) {
$count = 0;
if ($isit != other_is_ipv6($addr)) {
$count++;
}
if ($isit != other2_is_ipv6($addr)) {
$count++;
}
print (($count != 0) ? "$count" : ".");
} else {
print "\nFAILED: $addr should";
print $bool ? " " : " not ";
print "be valid\n";
}
}
XTEST(!1,"");
XTEST(1,"1.2.3.4");
XTEST(!1,"1.2.3");
XTEST(1,"2001:0000:1234:0000:0000:C1C0:ABCD:0876");
XTEST(1,"3ffe:0b00:0000:0000:0001:0000:0000:000a");
XTEST(1,"FF02:0000:0000:0000:0000:0000:0000:0001");
XTEST(1,"0000:0000:0000:0000:0000:0000:0000:0001");
XTEST(1,"0000:0000:0000:0000:0000:0000:0000:0000");
XTEST(1,"::ffff:192.168.1.26");
XTEST(!1,"02001:0000:1234:0000:0000:C1C0:ABCD:0876"); # extra 0 not allowed!
XTEST(!1,"2001:0000:1234:0000:00001:C1C0:ABCD:0876"); # extra 0 not allowed!
XTEST(1," 2001:0000:1234:0000:0000:C1C0:ABCD:0876");
XTEST(1," 2001:0000:1234:0000:0000:C1C0:ABCD:0876 ");
XTEST(!1," 2001:0000:1234:0000:0000:C1C0:ABCD:0876 0");
XTEST(!1,"2001:0000:1234: 0000:0000:C1C0:ABCD:0876");
XTEST(!1,"3ffe:0b00:0000:0001:0000:0000:000a");
XTEST(!1,"FF02:0000:0000:0000:0000:0000:0000:0000:0001");
XTEST(!1,"3ffe:b00::1::a");
XTEST(1,"2::10");
XTEST(1,"ff02::1");
XTEST(1,"fe80::");
XTEST(1,"2002::");
XTEST(1,"2001:db8::");
XTEST(1,"2001:0db8:1234::");
XTEST(1,"::ffff:0:0");
XTEST(1,"::1");
XTEST(1,"::ffff:192.168.1.1");
XTEST(1,"1:2:3:4:5:6:7:8");
XTEST(1,"1:2:3:4:5:6::8");
XTEST(1,"1:2:3:4:5::8");
XTEST(1,"1:2:3:4::8");
XTEST(1,"1:2:3::8");
XTEST(1,"1:2::8");
XTEST(1,"1::8");
XTEST(1,"1::2:3:4:5:6:7");
XTEST(1,"1::2:3:4:5:6");
XTEST(1,"1::2:3:4:5");
XTEST(1,"1::2:3:4");
XTEST(1,"1::2:3");
XTEST(1,"1::8");
XTEST(1,"::2:3:4:5:6:7:8");
XTEST(1,"::2:3:4:5:6:7");
XTEST(1,"::2:3:4:5:6");
XTEST(1,"::2:3:4:5");
XTEST(1,"::2:3:4");
XTEST(1,"::2:3");
XTEST(1,"::8");
XTEST(1,"1:2:3:4:5:6::");
XTEST(1,"1:2:3:4:5::");
XTEST(1,"1:2:3:4::");
XTEST(1,"1:2:3::");
XTEST(1,"1:2::");
XTEST(1,"1::");
XTEST(1,"1:2:3:4:5::7:8");
XTEST(0,"1:2:3::4:5::7:8");
XTEST(0,"12345::6:7:8");
XTEST(1,"1:2:3:4::7:8");
XTEST(1,"1:2:3::7:8");
XTEST(1,"1:2::7:8");
XTEST(1,"1::7:8");
XTEST(1,"1:2:3:4:5:6:1.2.3.4");
XTEST(1,"1:2:3:4:5::1.2.3.4");
XTEST(1,"1:2:3:4::1.2.3.4");
XTEST(1,"1:2:3::1.2.3.4");
XTEST(1,"1:2::1.2.3.4");
XTEST(1,"1::1.2.3.4");
XTEST(1,"1:2:3:4::5:1.2.3.4");
XTEST(1,"1:2:3::5:1.2.3.4");
XTEST(1,"1:2::5:1.2.3.4");
XTEST(1,"1::5:1.2.3.4");
XTEST(1,"1::5:11.22.33.44");
XTEST(!1,"1::5:400.2.3.4");
XTEST(!1,"1::5:260.2.3.4");
XTEST(!1,"1::5:256.2.3.4");
XTEST(!1,"1::5:1.256.3.4");
XTEST(!1,"1::5:1.2.256.4");
XTEST(!1,"1::5:1.2.3.256");
XTEST(!1,"1::5:300.2.3.4");
XTEST(!1,"1::5:1.300.3.4");
XTEST(!1,"1::5:1.2.300.4");
XTEST(!1,"1::5:1.2.3.300");
XTEST(!1,"1::5:900.2.3.4");
XTEST(!1,"1::5:1.900.3.4");
XTEST(!1,"1::5:1.2.900.4");
XTEST(!1,"1::5:1.2.3.900");
XTEST(!1,"1::5:300.300.300.300");
XTEST(!1,"1::5:3000.30.30.30");
XTEST(!1,"1::400.2.3.4");
XTEST(!1,"1::260.2.3.4");
XTEST(!1,"1::256.2.3.4");
XTEST(!1,"1::1.256.3.4");
XTEST(!1,"1::1.2.256.4");
XTEST(!1,"1::1.2.3.256");
XTEST(!1,"1::300.2.3.4");
XTEST(!1,"1::1.300.3.4");
XTEST(!1,"1::1.2.300.4");
XTEST(!1,"1::1.2.3.300");
XTEST(!1,"1::900.2.3.4");
XTEST(!1,"1::1.900.3.4");
XTEST(!1,"1::1.2.900.4");
XTEST(!1,"1::1.2.3.900");
XTEST(!1,"1::300.300.300.300");
XTEST(!1,"1::3000.30.30.30");
XTEST(!1,"::400.2.3.4");
XTEST(!1,"::260.2.3.4");
XTEST(!1,"::256.2.3.4");
XTEST(!1,"::1.256.3.4");
XTEST(!1,"::1.2.256.4");
XTEST(!1,"::1.2.3.256");
XTEST(!1,"::300.2.3.4");
XTEST(!1,"::1.300.3.4");
XTEST(!1,"::1.2.300.4");
XTEST(!1,"::1.2.3.300");
XTEST(!1,"::900.2.3.4");
XTEST(!1,"::1.900.3.4");
XTEST(!1,"::1.2.900.4");
XTEST(!1,"::1.2.3.900");
XTEST(!1,"::300.300.300.300");
XTEST(!1,"::3000.30.30.30");
XTEST(1,"fe80::217:f2ff:254.7.237.98");
XTEST(1,"fe80::217:f2ff:fe07:ed62");
XTEST(1,"2001:DB8:0:0:8:800:200C:417A"); # unicast, full
XTEST(1,"FF01:0:0:0:0:0:0:101"); # multicast, full
XTEST(1,"0:0:0:0:0:0:0:1"); # loopback, full
XTEST(1,"0:0:0:0:0:0:0:0"); # unspecified, full
XTEST(1,"2001:DB8::8:800:200C:417A"); # unicast, compressed
XTEST(1,"FF01::101"); # multicast, compressed
XTEST(1,"::1"); # loopback, compressed, non-routable
XTEST(1,"::"); # unspecified, compressed, non-routable
XTEST(1,"0:0:0:0:0:0:13.1.68.3"); # IPv4-compatible IPv6 address, full, deprecated
XTEST(1,"0:0:0:0:0:FFFF:129.144.52.38"); # IPv4-mapped IPv6 address, full
XTEST(1,"::13.1.68.3"); # IPv4-compatible IPv6 address, compressed, deprecated
XTEST(1,"::FFFF:129.144.52.38"); # IPv4-mapped IPv6 address, compressed
# XTEST(1,"2001:0DB8:0000:CD30:0000:0000:0000:0000/60"); # full, with prefix
# XTEST(1,"2001:0DB8::CD30:0:0:0:0/60"); # compressed, with prefix
# XTEST(1,"2001:0DB8:0:CD30::/60"); # compressed, with prefix #2
# XTEST(1,"::/128"); # compressed, unspecified address type, non-routable
# XTEST(1,"::1/128"); # compressed, loopback address type, non-routable
# XTEST(1,"FF00::/8"); # compressed, multicast address type
# XTEST(1,"FE80::/10"); # compressed, link-local unicast, non-routable
# XTEST(1,"FEC0::/10"); # compressed, site-local unicast, deprecated
XTEST(1,"127.0.0.1"); # standard IPv4, loopback, non-routable
XTEST(1,"0.0.0.0"); # standard IPv4, unspecified, non-routable
XTEST(1,"255.255.255.255"); # standard IPv4
XTEST(!1,"300.0.0.0"); # standard IPv4, out of range
# XTEST(!1,"124.15.6.89/60"); # standard IPv4, prefix not allowed
XTEST(!1,"2001:DB8:0:0:8:800:200C:417A:221"); # unicast, full
XTEST(!1,"FF01::101::2"); # multicast, compressed
XTEST(!1,""); # nothing
XTEST(1,"fe80:0000:0000:0000:0204:61ff:fe9d:f156");
XTEST(1,"fe80:0:0:0:204:61ff:fe9d:f156");
XTEST(1,"fe80::204:61ff:fe9d:f156");
XTEST(1,"fe80:0000:0000:0000:0204:61ff:254.157.241.086");
XTEST(1,"fe80:0:0:0:204:61ff:254.157.241.86");
XTEST(1,"fe80::204:61ff:254.157.241.86");
XTEST(1,"::1");
XTEST(1,"fe80::");
XTEST(1,"fe80::1");
print "\n";
|
|
|