12/22/00: Added & -> & matching pattern to cleanText() and cleanLink().
12/22/00: Added cleanText(). Changed local() calls to my(). 12/21/00: Improved link/url expression matching. 12/20/00: Added cleanLink(). 12/18/00: Initial coding.
This commit is contained in:
commit
92b57987d1
1 changed files with 167 additions and 0 deletions
167
bookmarks2opml.pl
Normal file
167
bookmarks2opml.pl
Normal file
|
@ -0,0 +1,167 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# $Id$
|
||||
#
|
||||
# File: bookmarks2opml.pl
|
||||
#
|
||||
# Function: Converts IE Bookmarks/Favorites from HTML to OPML
|
||||
#
|
||||
# Author(s): Erik C. Thauvin (erik@thauvin.net)
|
||||
#
|
||||
# Copyright: Copyright (C) 2000 Erik C. Thauvin
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Source: Started anew.
|
||||
#
|
||||
# Notes: Usage: bookmarks2opml.pl bookmarks.htm
|
||||
# --> bookmarks.opml
|
||||
#
|
||||
#
|
||||
# History:
|
||||
#
|
||||
# 12/22/00 ECT Added & -> & matching pattern to cleanText()
|
||||
# and cleanLink().
|
||||
# 12/22/00 ECT Added cleanText().
|
||||
# Changed local() calls to my().
|
||||
# 12/21/00 ECT Improved link/url expression matching.
|
||||
# 12/20/00 ECT Added cleanLink().
|
||||
# 12/18/00 ECT Initial coding.
|
||||
#
|
||||
#
|
||||
# Disclaimer:
|
||||
#
|
||||
# This software is provided "as is" without express or implied warranties.
|
||||
# Permission is granted to use, copy, modify and distribute this software,
|
||||
# provided this disclaimer and copyright are preserved on all copies. This
|
||||
# software may not, however, be sold or distributed for profit, or included
|
||||
# with other software which is sold or distributed for profit, without the
|
||||
# permission of the author.
|
||||
#
|
||||
|
||||
|
||||
# Declare local variables
|
||||
my($progname, $bookmarks, $title, $body, $opmlfile);
|
||||
|
||||
# Get the program's name
|
||||
($progname = $0) =~ s/.*\///;
|
||||
|
||||
# One argument is required, the name of the bookmarks file
|
||||
if (scalar @ARGV == 1)
|
||||
{
|
||||
$bookmarks = shift;
|
||||
}
|
||||
else
|
||||
{
|
||||
die "Usage: $progname <bookmarks-filename>\n";
|
||||
}
|
||||
|
||||
#
|
||||
# The Truth is Out There!
|
||||
#
|
||||
|
||||
# Open the bookmarks file
|
||||
open(F, "$bookmarks") || die "$progname: Could not open $bookmarks: $!\n";
|
||||
|
||||
# Read the bookmarks file
|
||||
while (<F>)
|
||||
{
|
||||
# Get the title
|
||||
if (/<TITLE>(.*)<\/TITLE>/i)
|
||||
{
|
||||
$title = $1;
|
||||
}
|
||||
# Get directory names
|
||||
elsif (/<DT><H3.*>(.*)<\/H3>/i)
|
||||
{
|
||||
# Build directory outline
|
||||
$body .= '<outline text="' . cleanText($1) . "\">\n";
|
||||
next;
|
||||
}
|
||||
# Get end of directory marker
|
||||
elsif (/\s<\/DL>/i)
|
||||
{
|
||||
# Close directory outline
|
||||
$body .= "<\/outline>\n";
|
||||
next;
|
||||
}
|
||||
# Get link reference
|
||||
elsif (/<DT><A HREF=\"(\S+)\" .*\">(.*)<\/A>/i)
|
||||
{
|
||||
# Build link outline
|
||||
$body .= '<outline text="' . cleanText($2) . '" type="link" url="' . cleanLink($1) . "\" \/>\n";
|
||||
next;
|
||||
}
|
||||
}
|
||||
|
||||
# Close bookmarks file
|
||||
close(F);
|
||||
|
||||
# Remove extra newline
|
||||
chomp($body);
|
||||
chomp($title);
|
||||
|
||||
# OPML output filename is based on bookmarks
|
||||
$opmlfile = $bookmarks;
|
||||
# Remove path, if any
|
||||
$opmlfile =~ s/.*\/(.*)/$1/;
|
||||
# Remove .???? suffix, if any
|
||||
$opmlfile =~ s/(.*)\..*/$1/;
|
||||
# Add .opml suffix
|
||||
$opmlfile .= '.opml';
|
||||
|
||||
# Open OPML output file
|
||||
open(F, ">$opmlfile") || die "$progname: Could not open $opmlfile: $!\n";
|
||||
|
||||
# Generate simple XML/OPML document
|
||||
print F <<EOT;
|
||||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<opml version="1.0">
|
||||
<head>
|
||||
<title>$title</title>
|
||||
</head>
|
||||
<body>
|
||||
$body
|
||||
</body>
|
||||
</opml>
|
||||
EOT
|
||||
|
||||
# Close OPML file
|
||||
close(F);
|
||||
|
||||
# Change OPML file type to Radio UserLand (MacPerl only)
|
||||
if ($MacPerl::Version)
|
||||
{
|
||||
MacPerl::SetFileInfo("Radu ", "OPML", $opmlfile);
|
||||
}
|
||||
|
||||
|
||||
#
|
||||
# URL-Encode problematic characters
|
||||
#
|
||||
sub cleanLink()
|
||||
{
|
||||
my($link) = @_;
|
||||
|
||||
$link =~ s/>/%3F/g;
|
||||
$link =~ s/</%3C/g;
|
||||
$link =~ s/'/%27/g;
|
||||
$link =~ s/"/%22/g;
|
||||
$link =~ s/&(?!(#[0-9]+|#x[0-9a-fA-F]+|\w+);)/&/g;
|
||||
|
||||
return $link
|
||||
}
|
||||
|
||||
#
|
||||
# HTML-Encode problematic characters
|
||||
#
|
||||
sub cleanText()
|
||||
{
|
||||
my($link) = @_;
|
||||
|
||||
$link =~ s/>/>/g;
|
||||
$link =~ s/</</g;
|
||||
$link =~ s/"/"/g;
|
||||
$link =~ s/&(?!(#[0-9]+|#x[0-9a-fA-F]+|\w+);)/&/g;
|
||||
|
||||
return $link
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue