commit 92b57987d149588cb8690d7ebb91ea2569464752 Author: Erik C. Thauvin Date: Fri Jul 26 04:16:54 2002 +0000 12/22/00: Added & -> & matching pattern to cleanText() and cleanLink(). 12/22/00: Added cleanText(). Changed local() calls to my(). 12/21/00: Improved link/url expression matching. 12/20/00: Added cleanLink(). 12/18/00: Initial coding. diff --git a/bookmarks2opml.pl b/bookmarks2opml.pl new file mode 100644 index 0000000..67dc161 --- /dev/null +++ b/bookmarks2opml.pl @@ -0,0 +1,167 @@ +#!/usr/bin/perl + +# $Id$ +# +# File: bookmarks2opml.pl +# +# Function: Converts IE Bookmarks/Favorites from HTML to OPML +# +# Author(s): Erik C. Thauvin (erik@thauvin.net) +# +# Copyright: Copyright (C) 2000 Erik C. Thauvin +# All Rights Reserved. +# +# Source: Started anew. +# +# Notes: Usage: bookmarks2opml.pl bookmarks.htm +# --> bookmarks.opml +# +# +# History: +# +# 12/22/00 ECT Added & -> & matching pattern to cleanText() +# and cleanLink(). +# 12/22/00 ECT Added cleanText(). +# Changed local() calls to my(). +# 12/21/00 ECT Improved link/url expression matching. +# 12/20/00 ECT Added cleanLink(). +# 12/18/00 ECT Initial coding. +# +# +# Disclaimer: +# +# This software is provided "as is" without express or implied warranties. +# Permission is granted to use, copy, modify and distribute this software, +# provided this disclaimer and copyright are preserved on all copies. This +# software may not, however, be sold or distributed for profit, or included +# with other software which is sold or distributed for profit, without the +# permission of the author. +# + + +# Declare local variables +my($progname, $bookmarks, $title, $body, $opmlfile); + +# Get the program's name +($progname = $0) =~ s/.*\///; + +# One argument is required, the name of the bookmarks file +if (scalar @ARGV == 1) +{ + $bookmarks = shift; +} +else +{ + die "Usage: $progname \n"; +} + +# +# The Truth is Out There! +# + +# Open the bookmarks file +open(F, "$bookmarks") || die "$progname: Could not open $bookmarks: $!\n"; + +# Read the bookmarks file +while () +{ + # Get the title + if (/(.*)<\/TITLE>/i) + { + $title = $1; + } + # Get directory names + elsif (/<DT><H3.*>(.*)<\/H3>/i) + { + # Build directory outline + $body .= '<outline text="' . cleanText($1) . "\">\n"; + next; + } + # Get end of directory marker + elsif (/\s<\/DL>/i) + { + # Close directory outline + $body .= "<\/outline>\n"; + next; + } + # Get link reference + elsif (/<DT><A HREF=\"(\S+)\" .*\">(.*)<\/A>/i) + { + # Build link outline + $body .= '<outline text="' . cleanText($2) . '" type="link" url="' . cleanLink($1) . "\" \/>\n"; + next; + } +} + +# Close bookmarks file +close(F); + +# Remove extra newline +chomp($body); +chomp($title); + +# OPML output filename is based on bookmarks +$opmlfile = $bookmarks; +# Remove path, if any +$opmlfile =~ s/.*\/(.*)/$1/; +# Remove .???? suffix, if any +$opmlfile =~ s/(.*)\..*/$1/; +# Add .opml suffix +$opmlfile .= '.opml'; + +# Open OPML output file +open(F, ">$opmlfile") || die "$progname: Could not open $opmlfile: $!\n"; + +# Generate simple XML/OPML document +print F <<EOT; +<?xml version="1.0" encoding="ISO-8859-1"?> +<opml version="1.0"> +<head> +<title>$title + + +$body + + +EOT + +# Close OPML file +close(F); + +# Change OPML file type to Radio UserLand (MacPerl only) +if ($MacPerl::Version) +{ + MacPerl::SetFileInfo("Radu ", "OPML", $opmlfile); +} + + +# +# URL-Encode problematic characters +# +sub cleanLink() +{ + my($link) = @_; + + $link =~ s/>/%3F/g; + $link =~ s//>/g; + $link =~ s/