#!/usr/bin/perl

###########################################################################
#
#  mailbrowse.pl
#  $Id: mailbrowse.pl,v 1.6 1999/10/26 01:38:06 pozar Exp $
#
#  Grabs the first line in the body of the message for a URL and runs LYNX
#  to get the web page.  Cleans up the text a bit and sends it back to the
#  sender of the orginal mail.
#
#  Code may be ugly as it is an afternoon hack.  If you have suggestions
#  please send them to pozar@lns.com.
# 
#  Copyright 1999 - Timothy Milan Pozar - pozar@lns.com
#
###########################################################################

$inbody = 0;
$url = "EMPTY";
$lastlineempty = 1;

# You may want to touch these files before you run this program...
open (LOG, ">>/var/log/mailbrowse.log") || die "can't open mailbrowse";
open (DEBUG, ">>/tmp/mailbrowse.debug") || die "can't open mailbrowsedebug";
while (<STDIN>) {

	print DEBUG $_;

	chop;

	if(($inbody == 1) && ($url eq "EMPTY")){
		# Find the URL...

		($url) = split(/\s/);
		$_ = $url;
		($url) = split(/!/);
		$_ = $url;
		($url) = split(/;/);
		$_ = $url;
		if(m(^http://)){
			print DEBUG "*** Found '$url' to get. ***\n";
			last;
		}
	} else {
		if(m/^From: /){
			# Find the return address...
			# This could be much smarter for all those exceptions.

			($foo[0],$foo[1],$foo[2],$foo[3],$foo[4]) = split(/ /);
			$i = 0;
			while($i <= 4){
				$_ = $foo[$i];
				if(m/\@/){
					if(m/</){
						chop;
						$requestor = substr($_,1);
					} else {
						$requestor = $_;
					}
					last;
				}
				if(m/\</){
					chop;
					$requestor = substr($_,1);
					last;
				}
				$i = $i+1;
			}
			print DEBUG "*** Found '$requestor' to send to. ***\n";
		}
	}

	if($_ eq ""){
		$inbody = 1;
	}
}

if(length($requestor) == 0){
	$date=`date`;chop($date);
	print LOG "$date: Could not get a valid email address\n";
	print "$date: mailbrowse: Could not get a valid email address\n";
	# Exit with an error and perhaps sendmail can deal with it...
	exit 1;
}

# Only support http requests.  We don't want to be serving back "file:" 
# requests like /etc/passwd...

$_ = "\Q$url";			# meta-escape all non-word characters

if(!m(^http://)){               
# if(index($url,"http://") == -1){
	$date=`date`;chop($date);
	print LOG "$date: Could not get a valid web url for $requestor\n";
	open (MAILPAGE, "|/bin/mail -s \"$url part $n\" $requestor") || die "can't open mailpage";
	print MAILPAGE "mailbrowse can't figure out what URL you want.  Use \"http://site/directory\" format.\n";
	exit 0;
}

$n = 0;

# Use lynx to retrive and format the page a bit.  Tell lynx with the "-wide"
# argument that you have a really wide terminal so the pager and we will do 
# the word wrapping.
# I am using Lynx Version 2.6 
# <URL:http://www.nyu.edu/pages/wsn/subir/lynx.html>

open (PAGE, "/usr/local/bin/lynx -dump -width=1000 $url |") || die "can't open page";
while (<PAGE>) {
	chop;
	tr[\200-\377][\000-\177];	# Strip 8th bit
	tr/ //s;			# compress a string of spaces to one.
	tr/-//s;			# compress a string of hyphens to one.
	tr/_//s;			# compress a string of underbars to one.
	s/^ //;				# strip out leading spaces on a line
	s/^\^$//;			# strip lines with just a caret. 
					# (Associated Press articles have these.)
	s/\[INLINE\]//g;		# strip out this string
	s/\[USEMAP\]//g;		# strip out this string
	s/\[LINK\]//g;			# strip out this string
	if($_ ne "") {
		$newpage = $newpage . $_ . "\n";
		$lastlineempty = 0;
	} else {
		if($lastlineempty == 0){
			$newpage = $newpage . "\n";
			# don't repeat empty lines.
			$lastlineempty = 1;
		}
	}

	# SkyTel pagers (Glenayre) have a 10,000 char limit per page. Break
	# apart long web pages into no more than 3 parts.

	if(length($newpage) > 9700){
		$n = $n + 1;
		open (MAILPAGE, "|/usr/bin/mail -s \"$url part $n\" $requestor") || die "can't open mailpage";
		print MAILPAGE $newpage;
		close (MAILPAGE);
		$newpage = "";
		if($n >= 3){
			last;
		}
	}
}

if(length($newpage) > 0){
	$n = $n + 1;
	open (MAILPAGE, "|/usr/bin/mail -s \"$url part $n\" $requestor") || die "can't open mailpage";
	print MAILPAGE $newpage;
	close (MAILPAGE);
}
$date=`date`;chop($date);
print LOG "$date: $requestor wants $url\n";

exit 0;
