#!/usr/bin/perl -w use strict; $^W = 1; use Socket qw (inet_aton inet_ntoa); my $DEBUG = 0; my %range; my $range_count = 0; my $log2 = log(2); my @mask; my @mask_packed; my %mask_decimal; for (my $i=0; $i<=31; $i++){ $mask[$i] = pack('B32', ('1'x(32-$i)).('0'x$i)); $mask_packed[$i] = pack('C',$i); $mask_decimal{pack('C',$i)} = $i; } my $ip_match = qr/^(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(\d|[01]?\d\d|2[0-4]\d|25[0-5])$/o; my $reg_dir = './'; # 'SPECIAL' IP RANGES (all from RFC3330) # a double asterix '**' indicates a local (non-public) IP ranges, # whereas a double dash '--' indicates some other IANA range # 'Blanket' coverage, with high handicap (ensure complete coverage) # note that the handicap is less than the default, to ensure that # no part is overwritten by RIPE database, which contains IANA ranges insert_raw(unpack('N',inet_aton('0.0.0.0')),2**32,'--',2**31); # "This" Network [RFC1700, page 4] insert_raw(unpack('N',inet_aton('0.0.0.0')),2**24,'**',2**24); # Private-Use Networks [RFC1918] insert_raw(unpack('N',inet_aton('10.0.0.0')),2**24,'**',2**24); # Public Data Networks [RFC1700, page 181] insert_raw(unpack('N',inet_aton('14.0.0.0')),2**24,'--',2**24); # Loopback [RFC1700, page 5] insert_raw(unpack('N',inet_aton('127.0.0.0')),2**24,'**',2**24); # Link Local insert_raw(unpack('N',inet_aton('169.254.0.0')),2**16,'**',2**16); # Private-Use Networks [RFC1918] insert_raw(unpack('N',inet_aton('172.16.0.0')),2**20,'**',2**20); # Test-Net insert_raw(unpack('N',inet_aton('192.0.2.0')),2**8,'**',2**8); # 6to4 Relay Anycast [RFC3068] insert_raw(unpack('N',inet_aton('192.88.99.0')),2**8,'**',2**8); # Private-Use Networks [RFC1918] insert_raw(unpack('N',inet_aton('192.168.0.0')),2**16,'**',2**16); # Network Interconnect Device Benchmark Testing [RFC2544] insert_raw(unpack('N',inet_aton('198.18.0.0')),2**17,'--',2**17); # Multicast [RFC3171] insert_raw(unpack('N',inet_aton('224.0.0.0')),2**28,'--',2**28); # Reserved for Future Use [RFC1700, page 4] insert_raw(unpack('N',inet_aton('240.0.0.0')),2**28,'--',2**28); read_reg('delegated-afrinic-latest'); read_reg('delegated-lacnic-latest'); read_reg('delegated-apnic-latest'); read_ripe(); read_reg('delegated-arin-latest'); join_neighbours(); punch_holes(); optimize(); output(); sub output { open(OUTFILE,"> sorted_countries.txt") or die ($!); foreach my $key (sort keys %range){ print OUTFILE inet_ntoa(substr($key,0,4)) . '|'; print OUTFILE 2 ** unpack('C',substr($key,4,1)) .'|'; print OUTFILE $range{$key}->{cc} ."\n"; } close OUTFILE; } sub formatRange { my ($start,$end,$cc) = @_; my $ip = pack('N',$start); my $size = ($end - $start) + 1; while (1){ my $mask = int(log($size)/log(2)); my $max_mask = get_max_mask($ip); if ($max_mask < $mask){ $mask = $max_mask; } print OUTFILE inet_ntoa($ip).'|'. 2 ** $mask .'|'. $cc ."\n"; $size = $size - (2 ** $mask); return unless ($size > 0); $ip = pack('N',(unpack('N',$ip) + 2 ** $mask)); } } sub optimize { print STDERR "performing final optimizations\n"; my $repeat = 1; while ($repeat){ $repeat = 0; my @key = sort keys %range; my $one = $key[0]; for (my $i = 1; $i<=$#key; $i++){ my $two = $key[$i]; if (exists $range{$one}){ my $one_mask_decimal = $mask_decimal{substr($one,4,1)}; my $two_mask_decimal = $mask_decimal{substr($two,4,1)}; if (($one_mask_decimal == $two_mask_decimal) && ($range{$one}->{cc} eq $range{$two}->{cc})){ my $one_ip_packed = substr($one,0,4); my $two_ip_packed = substr($two,0,4); if (($one_ip_packed & $mask[$one_mask_decimal + 1]) eq ($two_ip_packed & $mask[$two_mask_decimal + 1])){ my $one_ip_decimal = unpack('N',substr($one,0,4)); my $two_ip_decimal = unpack('N',substr($two,0,4)); insert_raw($one_ip_decimal, 2 ** ($one_mask_decimal + 1), $range{$one}->{cc}, $range{$one}->{handicap}); delete $range{$one}; delete $range{$two}; $repeat++; } } } $one = $two; } print STDERR " repeating ($repeat joins)\n" if $repeat; } } sub join_neighbours { print STDERR "optimizing by joining neighbouring ranges\n"; my $repeat = 1; while ($repeat){ $repeat = 0; my @key = sort keys %range; my $one = $key[0]; for (my $i = 1; $i<=$#key; $i++){ my $two = $key[$i]; if (exists $range{$one}){ my $one_mask_decimal = $mask_decimal{substr($one,4,1)}; my $two_mask_decimal = $mask_decimal{substr($two,4,1)}; if (($one_mask_decimal == $two_mask_decimal) && ($range{$one}->{cc} eq $range{$two}->{cc}) && ($range{$one}->{handicap} eq $range{$two}->{handicap})){ my $one_ip_packed = substr($one,0,4); my $two_ip_packed = substr($two,0,4); if (($one_ip_packed & $mask[$one_mask_decimal + 1]) eq ($two_ip_packed & $mask[$two_mask_decimal + 1])){ my $one_ip_decimal = unpack('N',substr($one,0,4)); my $two_ip_decimal = unpack('N',substr($two,0,4)); insert_raw($one_ip_decimal, 2 ** ($one_mask_decimal + 1), $range{$one}->{cc}, $range{$one}->{handicap}); delete $range{$one}; delete $range{$two}; $repeat++; } } } $one = $two; } print STDERR " repeating ($repeat joins)\n" if $repeat; } } sub punch_holes { print STDERR "removing overlapping ranges\n"; my $repeat = 1; while ($repeat) { $repeat = 0; foreach my $one (keys %range){ next unless (exists $range{$one}); # we're deleting, so need to be careful if (defined (my $two = get_existing_range($one))){ $repeat++; if ($DEBUG){ print STDERR ">>>>MATCH: ". inet_ntoa(substr($one,0,4)).'/'.unpack('C',substr($one,4,1)); print STDERR " and ". inet_ntoa(substr($two,0,4)).'/'.unpack('C',substr($two,4,1)); print STDERR "\n"; } my $one_mask_decimal = $mask_decimal{substr($one,4,1)}; my $two_mask_decimal = $mask_decimal{substr($two,4,1)}; if ($one_mask_decimal > $two_mask_decimal){ punch_hole($one,$two); } else { punch_hole($two,$one); } } } print STDERR " repeating ($repeat overlapping ranges)\n" if $repeat; } } sub punch_hole { my ($larger,$smaller) = @_; my $larger_mask_decimal = $mask_decimal{substr($larger,4,1)}; my $smaller_mask_decimal = $mask_decimal{substr($smaller,4,1)}; my $larger_handicap = $range{$larger}->{handicap}; my $smaller_handicap = $range{$smaller}->{handicap}; if ($larger_handicap <= $smaller_handicap){ # $larger is less handicapped, therefore $smaller # is deleted if ($DEBUG){ print STDERR ">>>>removing ". inet_ntoa(substr($smaller,0,4)).'/'.unpack('C',substr($smaller,4,1)); print STDERR " in favour of ". inet_ntoa(substr($larger,0,4)).'/'.unpack('C',substr($larger,4,1)); print STDERR "\n"; } delete $range{$smaller}; $range_count--; } else { # $smaller is less handicapped, therefore a hole # should be cut for it in $larger my $larger_cc = $range{$larger}->{cc}; if ($DEBUG){ print STDERR ">>>>deleting: ". inet_ntoa(substr($larger,0,4)).'/'.unpack('C',substr($larger,4,1)); print STDERR "\n"; } delete $range{$larger}; $range_count--; my $larger_ip_packed = substr($larger,0,4); my $larger_ip_decimal_start = unpack('N',$larger_ip_packed); my $smaller_ip_packed = substr($smaller,0,4); my $smaller_ip_decimal_start = unpack('N',$smaller_ip_packed); if ($larger_ip_decimal_start < $smaller_ip_decimal_start){ if ($DEBUG){ print STDERR ">>>>creating: ". inet_ntoa(pack('N',$larger_ip_decimal_start)).'/'. int(log($smaller_ip_decimal_start-$larger_ip_decimal_start)/$log2); print STDERR "\n"; } insert_raw($larger_ip_decimal_start,$smaller_ip_decimal_start-$larger_ip_decimal_start,$larger_cc,$larger_handicap); } my $larger_ip_decimal_end = $larger_ip_decimal_start + (2 ** $larger_mask_decimal) - 1; my $smaller_ip_decimal_end = $smaller_ip_decimal_start + (2 ** $smaller_mask_decimal) - 1; if ($larger_ip_decimal_end > $smaller_ip_decimal_end){ if ($DEBUG){ print STDERR ">>>>creating: ". inet_ntoa(pack('N',$smaller_ip_decimal_end+1)).'/'. int(log($larger_ip_decimal_end-$smaller_ip_decimal_end)/$log2); print STDERR "\n"; } insert_raw($smaller_ip_decimal_end+1,$larger_ip_decimal_end-$smaller_ip_decimal_end,$larger_cc,$larger_handicap); } } } sub get_existing_range { my $key = shift; my $ip_packed = substr($key,0,4); for (my $i = 31; $i>=0; $i--){ my $existing_key = ($ip_packed & $mask[$i]) . $mask_packed[$i]; next if $existing_key eq $key; if (exists $range{$existing_key}){ return $existing_key; } } return undef; } sub insert_raw { my ($ip_decimal,$size,$cc,$handicap) = @_; while ($size > 0){ my $ip_packed = pack('N',$ip_decimal); my $max_mask = get_max_mask($ip_packed); if ((2 ** $max_mask) > $size){ $max_mask = int(log($size)/$log2); } add_range($ip_packed,$mask_packed[$max_mask],$cc,$handicap); $ip_decimal += (2 ** $max_mask); $size -= (2 ** $max_mask); } } sub add_range { my ($ip_packed, $mask_packed, $cc, $handicap) = @_; my $key = $ip_packed . $mask_packed; if (exists $range{$key}){ my $existing = $range{$key}; if ($existing->{handicap} > $handicap){ $range{$ip_packed . $mask_packed} = {cc => $cc, handicap => $handicap}; $range_count++; } } else { $range{$ip_packed . $mask_packed} = {cc => $cc, handicap => $handicap}; $range_count++; } } sub get_max_mask { my $ip_packed = shift; for (my $i = 31; $i>=0; $i--){ return $i if (($ip_packed | $mask[$i]) eq $mask[$i]); } die("strange IP: ". inet_ntoa($ip_packed)); } sub read_ripe { print STDERR "loading data from ripe.db.inetnum\n"; my $ripe_inet_line = qr/^inetnum:\s+(\S+)\s*-\s*(\S+)/o; my $ripe_cc_line = qr/^country:\s+(\S\S)/o; open (REG,"< $reg_dir/ripe.db.inetnum") or die("can't open $reg_dir/ripe.db.inetnum: $!"); binmode REG, ':crlf'; { my $start; my $end; my $cc; my $status; while (my $line = ){ if (defined $start){ next unless $line =~ $ripe_cc_line; $cc = uc $1; $cc = 'GB' if ($cc eq 'UK'); insert_raw($start,$end-$start+1,$cc,$end-$start+1); $start = undef; $end = undef; } elsif ($line =~ $ripe_inet_line){ my ($a_start,$a_end) = ($1,$2); if ($a_start =~ $ip_match){ $start = ($1 * 16777216) + ($2 * 65536) + ($3 * 256) + $4; } if ($a_end =~ $ip_match){ $end = ($1 * 16777216) + ($2 * 65536) + ($3 * 256) + $4; } die($line) unless ((defined $start) && (defined $end)); } else { } } } close REG || warn("can't close $reg_dir/ripe.db.inetnum, but continuing: $!"); } sub read_reg { my $path = shift; open (REG, "< $reg_dir/$path") || die("can't open $reg_dir/$path: $!"); binmode REG, ':crlf'; print STDERR "loading data from $path\n"; my $stat_line = qr/^([^\|]+)\|(..)\|ipv4\|([^\|]+)\|(\d+)\|/o; while (my $line = ){ chomp $line; next unless $line =~ $stat_line; my ($auth,$cc,$ip,$size) = ($1,uc $2,$3,$4); next unless ($ip =~ $ip_match); my $start = ($1 * 16777216) + ($2 * 65536) + ($3 * 256) + $4; $cc = 'GB' if ($cc eq 'UK'); insert_raw($start,$size,$cc,$size); } close REG || warn("can't close $reg_dir/$path, but continuing: $!"); }