# $Id: Collection.pm,v 1.14 2003/08/13 14:05:22 gene Exp $ package URI::Collection; use strict; use vars qw($VERSION); $VERSION = '0.09_01'; use Carp; use Cwd; use File::Spec; use File::Find; use File::Path; use IO::String; use Config::IniFiles; use Netscape::Bookmarks; #------------------------------------ # Positions my $position = &_position_as_hashref; sub _position_as_hashref { my $r = {}; my $count = 0; for( qw( title url category add_date last_modified last_visit iconfile iconindex description alias_id baseurl modified ) ) { $r->{$_} = $count++; } return $r; } #------------------------------------- sub _debug { print @_, "\n" if shift->{debug}; } sub new { my ($class, %args) = @_; my $self = { debug => $args{debug} || 0, links => $args{links} || [], ignore_dups => $args{ignore_dups} || 0, #------------------------------------------ #cat_links => {} #------------------------------------------ cat_links => [ $position, # Hashref of positions {} # Key == categorie, value == arrayref of arrayrefs each with link data ] #------------------------------------------- }; bless $self, $class; $self->_init($args{links}) if $args{links}; return $self; } sub _init { my $self = shift; $self->{links} = [ $self->{links} ] if $self->{links} && !ref($self->{links}) && (-f $self->{links} || -d $self->{links}) ; for( @{ $self->{links} } ) { croak "$_ does not exist.\n" unless -d || -f; if (-d) { # Handle a M$ Windows Favorites directory tree. $self->_traverse($_); } elsif (-f) { # Handle a Netscape style bookmark file. $self->_parse_file($_); } } } sub as_bookmark_file { my ($self, %args) = @_; $self->_debug('entering as_bookmark_file'); # Make the top level bookmark category. my $top = Netscape::Bookmarks::Category->new({ folded => 0, title => __PACKAGE__ .' Bookmarks', add_date => time, description => 'Bookmarks generated by '. __PACKAGE__, id => '?', }); $self->_debug("Created $top"); # Declare a hash for storing the category objects. my %categories; # Make bookmark categories for the internal category paths. #----------------------------------- #for my $path (sort keys %{ $self->{cat_links} }) #----------------------------------- for my $path (sort keys %{ $self->{cat_links}->[1] }) #----------------------------------- { # Current category. my $category; # Make a bookmark category for each title in the category # path. # NOTE: This split means, "split on any forward slashes that # are not preceeded by a backslash." for my $title (split /(?new({ folded => 0, title => $title, add_date => time, description => '', id => '?', }); $self->_debug("Created unseen $categories{$title}"); $category->add($categories{$title}); } # "Increment" the current category with the one just seen. $category = $categories{$title}; } # Add links to the last seen category. #------------------------------------ #for (@{ $self->{cat_links}{$path} }) #------------------------------------ for( @{ $self->{cat_links}->[1]->{$path} }) #------------------------------------ { #------------------------------------ #$category->add(_make_bookmark( %$_ )); #$self->_debug('Added link for '. join ', ', @$_ ); #------------------------------------ $category->add(_make_bookmark( $_ )); $self->_debug('Added link for ' . join( ', ', map { !defined $_ ? 'undef' : $_ } @$_) ); #------------------------------------ } } # Save the bookmarks as a file, if told to. if ($args{save_as}) { open BOOKMARKS, "> $args{save_as}" or croak "Can't write $args{save_as} - $!\n"; print BOOKMARKS $top->as_string; close BOOKMARKS; # Return the bookmark filename. $self->_debug('exiting as_bookmark_file'); return $args{save_as}; } else { # Return the bookmark file contents. return $top->as_string; } } sub as_favorite_directory { my ($self, %args) = @_; my $favs; if ($args{save_as}) { # Create a top level directory for our Favorites. $favs = $args{save_as}; mkpath $favs; chdir $favs; $favs = getcwd; } else { # Initialize the favorites structure. $favs = {}; } # Build the Favorites tree with Internet Shortcut files. #---------------------------- #for my $path (keys %{ $self->{cat_links} }) #---------------------------- for my $path (keys %{ $self->{cat_links}->[1] }) #---------------------------- { #-------------------------- #if ($args{save_as}) #-------------------------- # Create and change dir if $path isn't empty string if ($args{save_as} && $path) #-------------------------- { mkpath $path; chdir $path; } # Add links to the path category. #---------------------------- #for (@{ $self->{cat_links}{$path} }) #---------------------------- for( @{ $self->{cat_links}->[1]->{$path} } ) #---------------------------- { #---------------- #my ($title, $url) = %$_; #my $obj = _make_favorite($title, $url); #---------------- # $_ is an arrayref with values of a link my $obj = _make_favorite( $_ ); #---------------- if ($args{save_as}) { #---------------------------- #$obj->WriteConfig("$title.url"); #---------------------------- my $title = $_->[ $self->{cat_links}->[0]->{title} ]; $obj->WriteConfig("$title.url"); # Set file times. my $atime = $_->[ $self->{cat_links}->[0]->{last_visit} ]; #Access time my $mtime = $_->[ $self->{cat_links}->[0]->{last_modified} ]; #Modification time utime $atime, $mtime, "$title.url"; #-------------------- } else { # Aha! The undocumented OutputConfig method is what I # want. Of course, it sucks and prints to STDOUT, # instead of returning a scalar. my $str; my $str_fh = IO::String->new($str); my $old_fh = select $str_fh; $obj->OutputConfig; select $old_fh if defined $old_fh; push @{ $favs->{$path} }, $str; } } # Change back to the top level path category directory. chdir $favs if $args{save_as}; } # Return the top level directory or favorites structure. return $favs; } #----------------------------- sub fetch { my ($self, %args) = @_; my %items; $self->_debug('entering fetch'); # Step through every link. while (my ($category, $links) = each %{ $self->{cat_links}->[1] } ) { $self->_debug("Cat: $category"); # Check if there area arguments or a matching category if ( !(keys %args) || !scalar(grep(defined, (values %args))) || _matches($category, $args{category}) ) { # Fetch an entire category's links. for (@$links) { if( !(keys %args) || ( _matches($category, $args{category}) && ( !($args{title} || $args{url}) || _matches($_, \%args) ) ) || ( !$args{category} && _matches($_, \%args) ) ) { $self->_debug("link: ", join(',', @$_ || () ) ); push @{ $items{$category} }, $_; } } } } $self->_debug('exiting fetch'); return [ $position, \%items ]; } sub set { my($self,$data) = @_; $self->{cat_links} = $data; } #----------------------------- # Return all similar links or categories. sub fetch_items { my ($self, %args) = @_; my %items; $self->_debug('entering fetch_items'); # Step through every link. #--------------------------- #while (my ($category, $links) = each %{ $self->{cat_links} }) #--------------------------- while (my ($category, $links) = each %{ $self->{cat_links}->[1] }) #--------------------------- { $self->_debug("Cat: $category"); # If we are given a title, url or matching category if ( !(keys %args) || $args{title} || $args{url} || _matches($category, $args{category}) ) { # Fetch an entire category's links. #$self->_debug("cat match: $category") if _matches($category, $args{category}); for (@$links) { #--------------------------- #my ($title, $url) = %$_; #--------------------------- my ($title, $url) = ( $_->[ $position->{title} ], $_->[ $position->{url} ] ); #--------------------------- # XXX Yikes. This condition is... giant! if( !(keys %args) || (_matches($category, $args{category}) && ( !($args{title} || $args{url}) || _matches($title, $args{title}) || _matches($url, $args{url}) ) ) || (!$args{category} && ( _matches($title, $args{title}) || _matches($url, $args{url}) ) ) ) { $self->_debug("link: $title => $url"); #--------------------------- #push @{ $items{$category} }, $_; #--------------------------- push @{ $items{$category} }, { $_->[ $position->{title} ] => $_->[ $position->{url} ] }; #--------------------------- } } } } $self->_debug('exiting fetch_items'); return \%items; } # Alias for fetch_items restricted to a single title/url pattern. sub is_item { my ($self, $pat) = @_; my $items = $self->fetch_items(title => $pat, url => $pat); return keys %$items > 0 ? $items : undef; } sub _matches { my ($string, $pattern) = @_; #------------------------------------------ #return $pattern && $string =~ /$pattern/i ; #------------------------------------------ return 0 if ! $pattern; return $string =~ /$pattern/i if !ref($string); # $string is an arrayref representing a link and $pattern is a hashref # with arguments passed to 'fetch' (hashref with regexps as values). # We return true if one regex matches. foreach (keys %$pattern) { my $link_item = $string->[ $position->{$_} ] ; if( exists $pattern->{$_} && $link_item ) { my $regexp = $pattern->{$_} ; return 1 if $link_item =~ /$regexp/i ; } } return 0; #------------------------------------------ } #------------------------------------------------- #sub _title_and_url #{ # my $link = shift; # my ($title, $url); # # if (ref $link eq 'Netscape::Bookmarks::Link') # { # ($title, $url) = ($link->title, $link->href); # } # elsif (ref $link eq 'Config::IniFiles') # { # ($title, $url) = ( # $link->val('Title', 'Title'), # $link->val('InternetShortcut', 'URL') # ); # } # # return $title => $url; #} #-------------------------------------------------- # Step over the Favorites directory and add the categories and links # to our internal categories and links structure. sub _traverse { my ($self, $dir) = @_; #--------------------------- my $dirq = quotemeta $dir; #--------------------------- $self->_debug("entering _traverse with $dir"); find ( sub { if (/^(.+?)\.url$/) { # The file name - sans extension - is the title. my $title = $1; # Remove the Favorites tree path from the category # name. # Set the top level category if we are there. #---------------------------- #(my $category = $File::Find::dir) =~ s/^$dir//; #$category = 'Favorites' unless $category; #---------------------------- (my $category = $File::Find::dir) =~ s/^$dirq//; $category = '.' unless $category; $self->_debug("\$File::Find::dir=$File::Find::dir ", "Cat: $category\n"); #---------------------------- # Convert the (platform dependent) path separators to # slashes. I.e., forward slashes in category names # are replaced with "back-slash escaped" forward # slashes ("\/"). $category = join '/', map { s!\/!\\/!g; $_ } grep { $_ } File::Spec->splitdir($category); # Create a Winblows url object. my $obj = Config::IniFiles->new( '-file' => "$title.url" ); #---------------------------- ## Add a title section to the url object. #$obj->AddSection('Title'); #$obj->newval('Title', 'Title', $title); # ## Add the category and link! # push @{ $self->{cat_links}{$category} }, { # _title_and_url($obj) #}; #$self->_debug($self->{cat_links}{$category}[-1]); #---------------------------- my ($last_visit,$last_modified,$add_date) = (stat("$title.url"))[8..10]; # Position here is important my $link = []; $link->[$_] = undef for ( 0..(scalar(keys %$position) - 1) ); $link->[ $position->{title} ] = $title; $link->[ $position->{url} ] = $obj->val('InternetShortcut','URL'); $link->[ $position->{category} ] = $category; $link->[ $position->{add_date} ] = $add_date; $link->[ $position->{last_modified} ] = $last_modified; $link->[ $position->{last_visit} ] = $last_visit; $link->[ $position->{iconfile} ] = $obj->val('InternetShortcut','IconFile'); $link->[ $position->{iconindex} ] = $obj->val('InternetShortcut','IconIndex'); $link->[ $position->{baseurl} ] = $obj->val('DEFAULT','BASEURL'); $link->[ $position->{modified} ] = $obj->val('InternetShortcut','Modified'); push @{ $self->{cat_links}->[1]->{$category} }, $link; $self->_debug("Category: $category Link: ", $self->{cat_links}->[1]->{$category}[-1]->[$position->{title}] ); #---------------------------- } }, $dir ); $self->_debug('exiting _traverse'); } # Parse the given bookmarks file into our internal categories and # links structure. sub _parse_file { my ($self, $file) = @_; $self->_debug("entering _parse_file with $file"); # Declare our categories list and current category title. my (@category, $category); # Define the last seen level as the top. #--------------------------------- #my $last_level = 0; #--------------------------------- my( $cat_level, $link_level ) = (0,0); #--------------------------------- # Define a Netscape bookmarks object. my $nb = Netscape::Bookmarks->new($file); $nb->recurse( sub { my ($obj, $level) = @_; $self->_debug(ref($obj), ': ', $obj->title, " @ $level"); (my $title = $obj->title) =~ s/[\\\/\:\*\?\"\<\>\|]/\./g; if ($obj->isa('Netscape::Bookmarks::Category')) { #Find the current / separated category name. #------------------------------------------------------- #if ($level > 0) #{ # if ($level <= $last_level) # { # # XXX splice would be more idiomatic... # pop @category for 1 .. $last_level - $level + 1; # } # # # Add the category title. # push @category, $obj->title; #} # ## Set the current category and level. ## NOTE that / is forced as the "path separator" here. #$category = join '/', @category; #$last_level = $level; #------------------------------------------------------ if ($level > 0) { if ($level <= $cat_level) { # XXX splice would be more idiomatic... pop @category for 1 .. $cat_level - $level + 1; } # Add the category title. push @category, $title; } # Set the current category and level. # NOTE that / is forced as the "path separator" here. $category = join '/', @category; $cat_level = $level; #------------------------------------------------------ } elsif ($obj->isa('Netscape::Bookmarks::Link')) { # Add the category and link to our internal structure. #-------------------------------- #push @{ $self->{cat_links}{$category} }, { # $obj->title => $obj->href #}; #-------------------------------- if( $level > 0 ) { if( $level <= $cat_level || $level < $link_level) { # XXX splice would be more idiomatic... pop @category for 1 .. $cat_level - $level + 1 ; # Set the current category and level. # NOTE that / is forced as the "path separator" here. $category = join '/', @category; $cat_level = $level - 1; } } $link_level = $level; # Position here is important my $link = []; $link->[$_] = undef for ( 0..(scalar(keys %$position) - 1) ); $link->[ $position->{title} ] = $title ; $link->[ $position->{url} ] = $obj->href; $link->[ $position->{category} ] = $category; $link->[ $position->{add_date} ] = $obj->add_date; $link->[ $position->{last_modified} ] = $obj->last_modified; $link->[ $position->{last_visit} ] = $obj->last_visit; $link->[ $position->{description} ] = $obj->description; $link->[ $position->{alias_id} ] = $obj->aliasid; push @{ $self->{cat_links}->[1]->{$category || '.'} }, $link; #-------------------------------- } } ); $self->_debug('exiting _parse_file'); } sub _make_favorite { #-------------------------------- #my ($title, $url) = @_; # ## Define an Internet Shortcut object based on the given Netscape ## bookmark object. #my $obj = Config::IniFiles->new; #$obj->AddSection('DEFAULT'); #$obj->newval('DEFAULT', 'BASEURL', $url); #$obj->AddSection('InternetShortcut'); #$obj->newval('InternetShortcut', 'URL', $url); # #return $obj; #-------------------------------- my $aref = shift; my $obj = Config::IniFiles->new; if( defined $aref->[ $position->{baseurl} ] ) { $obj->AddSection('DEFAULT'); $obj->newval('DEFAULT', 'BASEURL', $aref->[ $position->{baseurl} ]); } if( defined $aref->[ $position->{url} ] ) { $obj->AddSection('InternetShortcut') if !$obj->SectionExists('InternetShortcut'); $obj->newval('InternetShortcut', 'URL', $aref->[ $position->{url} ]); } if( defined $aref->[ $position->{last_modified} ] ) { $obj->AddSection('InternetShortcut') if !$obj->SectionExists('InternetShortcut'); $obj->newval('InternetShortcut', 'Modified', $aref->[ $position->{modified} ]); } if( defined $aref->[ $position->{iconfile} ] ) { $obj->AddSection('InternetShortcut') if !$obj->SectionExists('InternetShortcut'); $obj->newval('InternetShortcut', 'IconFile', $aref->[ $position->{iconfile} ]); } if( defined $aref->[ $position->{iconindex} ] ) { $obj->AddSection('InternetShortcut') if !$obj->SectionExists('InternetShortcut'); $obj->newval('InternetShortcut', 'IconIndex', $aref->[ $position->{iconindex} ]); } return $obj; #-------------------------------- } sub _make_bookmark { #------------------------------------ #my ($title, $href) = @_; ##warn "$title, $href\n"; # ## Define a Netscape bookmark link based on the given Internet ## Shortcut object. #return Netscape::Bookmarks::Link->new({ # TITLE => $title, # DESCRIPTION => '', # HREF => $href, # ADD_DATE => '', # LAST_VISIT => '', # LAST_MODIFIED => '', # ALIAS_ID => '', #}); #------------------------------------ my $aref = shift; return Netscape::Bookmarks::Link->new({ TITLE => $aref->[ $position->{title} ] || '', DESCRIPTION => $aref->[ $position->{description} ] || '', HREF => $aref->[ $position->{url} ] || '', ADD_DATE => $aref->[ $position->{add_date} ] || '', LAST_VISIT => $aref->[ $position->{last_visit} ] || '', LAST_MODIFIED => $aref->[ $position->{last_modified} ] || '', ALIAS_ID => $aref->[ $position->{alias_id} ] || '' }); #------------------------------------ } 1; __END__ =head1 NAME URI::Collection - Input and output link collections in different formats =head1 SYNOPSIS use URI::Collection; $c = URI::Collection->new; $c = URI::Collection->new(links => $bookmark_file); $c = URI::Collection->new(links => $favorite_directory); $c = URI::Collection->new( links => [ $bookmark_file, $favorite_directory ], ); $links = $c->fetch_items( category => $regexp_1, title => $regexp_2, url => $regexp_3, ); if ($items = $c->is_item($regexp)) { print Data::Dumper($items); } $bookmarks = $c->as_bookmark_file; $c->as_bookmark_file(save_as => $filename); $favorites = $c->as_favorite_directory; $c->as_favorite_directory(save_as => $directory); =head1 DESCRIPTION An object of class C represents the links and categories in parsed Netscape style bookmark files and Windows "Favorites" directories, with output in either style. =head1 METHODS =head2 new $c = URI::Collection->new(links => $bookmark_file); $c = URI::Collection->new(links => $favorite_directory); $c = URI::Collection->new( links => [ $bookmark_file, $favorite_directory ], ); Where C may be a Netscape bookmark file name, a Windows favorite directory path or an arrayref of any amount of both. Return a new C object after processing the specified Netscape bookmark files and specified Windows .url files. B On Windows OSes, bookmarks are saved as files (one file per bookmark) with extension I<.url>. A I<.url> file is a plain text file with the same structure as Windows C<.ini> files, and may be processed with the C module. B: On the Netscape browser (and Mozilla too), bookmarks are saved as links in an html page that is maintained by the browser. If no arguments are passed, this method returns an empty C object. This method mashes link store formats together, simultaneously. It creates an internal data structure that is the same without worrying what kind of argument(s) is(are) specified. =head2 as_bookmark_file $bookmarks = $c->as_bookmark_file; $c->as_bookmark_file(save_as => $filename); Without an argument this method returns a Netscape style bookmark file as a string with the current bookmark as file contents. With an argument, save the bookmarks to disk as a Netscape style bookmark file called C. Note that this method lets you convert Windows style C<.url> bookmarks to a Netscape style bookmark file. =head2 as_favorite_directory $favorites = $c->as_favorite_directory; $c->as_favorite_directory(save_as => $directory); Without argument returns a M$ Windows "Favorites" tree as a hash reference, where the keys are the subdirectories (categories) and the values are array references with the contents of M$ Windows C<*.url> files as string elements. B In Netscape bookmark files you may group related bookmarks inside what is called a 'category'. Netscape categories may be nested inside other categories. With Windows Favorites, categories are groups of related C<.url> files inside directories. For the purposes of this documentaion and module, we refer to both collectivelly as categories. This tree is one dimensional. That is, the nested categories are represented (as hashref keys) by "slash separated paths" (without worrying if it comes from a Netscape category or a Windows Favorite subdirectory). An example will elucidate: { 'foo' => [ $link1, $link2 ], 'foo/bar' => [ $link3, $link4 ], 'baz' => [ $link5 ], 'baz/x/y' => [ $link6, $link7 ] } Here C<$link1, $link2, ...> and so are strings, each with the content of a Windows C<.url> file, without worring if the link comes from a Windows C<.url> file or a Netscape bookmark. For documentation about the format of a Windows C<.url> file see L. With argument, this method will create a Windows like directory hierarchy and fill it with Windows style, C<.url> file bookmarks. It is assumed that the value of C is the root path of the directory tree to be created. Note that this function lets you convert a Netscape style bookmark file to a Windows style C<.url> file directory tree. =head2 fetch_items $items = $c->fetch_items( category => $regexp_1, title => $regexp_2, url => $regexp_3, ); Returns links that have titles, urls or categories that match the given regular expressions. Returned value is a hashref with this format: { 'name/of/category' => { title_of_item => url_of_item }, ... } Note that if a category argument is supplied, only links under matching categories will be found. If no category argument is provided, any link with a matching title or url will be returned. If no arguments are provided, all links are returned. =head2 is_item $items = $c->is_item($regexp); Return the items whose titles or urls match the given regular expression. Note that this method is just C with the no category argument and identical title and url pattern. =head2 fetch $data_struct = $c->fetch( title => $regexp_1, url => $regexp_2, category => $regexp_3, add_date => $regexp_4, last_modified => $regexp_5, last_visit => $regexp_6, iconfile => $regexp_7, iconindex => $regexp_8, description => $regexp_9, alias_id => $regexp_10, baseurl => $regexp_11, modified => $regexp_12 ); Allows you to select items that match one or more regular expressions. Note that if a C argument is supplied, only links under matching categories will be found. If no category argument is provided, any item matching at least one regexp will be returned. All arguments are optional. If no arguments are specified, all link items are returned (i.e. no selection is done). The data structure that is returned has this format: [ { add_date => 3, alias_id => 9, baseurl => 10, category => 2, description => 8, iconfile => 6, iconindex => 7, last_modified => 4, last_visit => 5, modified => 11, title => 0, url => 1, }, { 'name/of/category' => [ [ ... ], # <-- (A) ... ], ... } ] Where each C element is an arrayref of data sorted as specified by the first hashref. For example, if you have to get 'iconfile' of 1st C element of category 'category_1', you have to write: $data_struct->[1]->{'category_1'}->[0]->[ $data_struct->[0]->{iconfile} ] XXX Which is some heinous syntax. -gb Another example ('category_1', 2nd C element, 'iconfile'): $data_struct->[1]->{'category_1'}->[1]->[ $data_struct->[0]->{iconfile} ] Please, don't assume C<$data_struct->[0]->{iconfile}> is 6, because this may change in future releases. Also, take into account that C<'name/of/category'> is C<'.'> for root category. =head2 set $c->set( $data_struct ); Sets the internal data structure of an C object (C<$c>). =head1 DEPENDENCIES L L L L L L L L =head1 TO DO Ignore redundant links. Optionally return the M$ Favorites directory structure (as a variable) instead of writing it to disk. Allow input/output of file and directory handles. Allow slicing of the category-links structure. Allow link munging to happen under a given category or categories only. Check if links are active. Update link titles and URLs if changed or moved. Mirror links? Handle other bookmark formats (including some type of generic XML), and "raw" (CSV) lists of links, to justify such a generic package name. This includes different platform flavors of every browser. Move the Favorites input/output functionality to a seperate module like C and C, or some such. Do the same with the above mentioned "platform flavors", such as Opera and Mosaic "Hotlists", and OmniWeb bookmarks, etc. =head1 SEE ALSO C There are an enormous number of web-based bookmark managers out there (see http://useful.webwizards.net/wbbm.htm), which I don't care about at all. What I do care about are multi-format link converters. Here are a few that I found: Online manager: C CDML Universal Bookmark Manager (for M$ Windows only): C OperaHotlist2HTML: C bk2site: C Windows favorites convertor: C bookmarker: C Columbine Bookmark Merge: C XBEL Bookmarks Editor: C And here are similar perl modules: L L L L =head1 THANK YOU Thank you to #perl for answering my random questions about this. :) Thank you to Enrique for working this into something more useful. =head1 AUTHORS Gene Boggs Egene@cpan.orgE Enrique Castilla Eecastillacontreras@yahoo.esE =head1 COPYRIGHT AND LICENSE Copyright 2003-2005 by Gene Boggs This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut