#!/usr/bin/perl
#
#
# Program to convert accented letters to their html code
# equivalent or back. Option -h converts to html codes,
# option -a converts back to accented letters for easier
# reading. Non breaking spaces are unaffected. Default
# is to convert to html codes if no option specified.
#
# The pgm is called with the option -dv[a|h] and the name(s) of the
# data file. ie: htmlizer.pl -dh girardet.txt or *.html
# Output is returned in the same file. Lists are not processes, only 
# 1 entey is permitted after the option field.
# 
#
# Rev 0.0 - Mar 20-2004 : Initial start
#     1.0 - Jun 03-2004 : added multiple files processing
#     1.1 - Jun 07-2004 : *.nnn did not expand under winbooze. changed to <glob>
#     1.2 - Jun 08-2004 : Added verbose option
#
#
$html = 0; #default mode To html mode.
if (@ARGV==0) {print "Help :\n"; &usage; exit(1);}  # Provide help if no args
elsif ($ARGV[0] =~ /^-/)
        {
         if ($ARGV[0] =~ /d/) {$debug=1; $ARGV[0]=$`.$'; print "Debug Mode\n";}
         if ($ARGV[0] =~ /v/) {$verbose=1; $ARGV[0]=$`.$'; print "Verbose Mode\n";}
         if ($ARGV[0] =~ /a/) {$html=1; $ARGV[0]=$`.$';}
         if ($ARGV[0] =~ /h/) {$html=0; $ARGV[0]=$`.$';}
         if ($ARGV[0] =~ /\w/) {print "Invalid Option: $ARGV[0]\n"; &usage;}
         shift(@ARGV); # discard no longer needed option field
         }
#
if ($html){print "To accent mode\n";}else{print "To html mode\n";}
#
if ($debug){print "Original file(s) will be kept with prefix: old_ \n"};
# 
if (@ARGV==0) {print "Missing Filename\n"; die "\n";} 
#
while(<$ARGV[0]>){
  $data=$_;
#
$newnam = "old_".$data;
rename($data, $newnam) || die "Can't rename $data file";
#
open (IN, $newnam);
open (OUT, ">".$data) || die "Can't open $data for writing";
if ($verbose){print "Processing file : $data - ";}
$count = 0;
#
while($_=<IN>){
  if (!$html){$_=&html($_);}else{$_=&accent($_);}
  print OUT $_; $count++;
                    }
if ($verbose){print "processed $count records\n";}
close OUT; close IN;
if (!$debug){unlink($newnam);}
             } #while (<*.html>)
#




# Subroutine to replace accents by special html codes.
#       call: $R=&html($R);
#
sub html {
        my($R)=@_;
        $R =~ s/\xe9/&eacute;/g;
        $R =~ s/\xe8/&egrave;/g;
        $R =~ s/\xea/&ecirc;/g;
        $R =~ s/\xf9/&ugrave;/g;
        $R =~ s/\xe0/&agrave;/g;
        $R =~ s/\xe2/&acirc;/g;
        $R =~ s/\xee/&icirc;/g;
        $R =~ s/\xf4/&ocirc;/g;
        $R =~ s/\xfb/&ucirc;/g;
        $R =~ s/\xe7/&ccedil;/g;
        return $R;
           }

# Subroutine to replace html codes by accents.
#       call: $R=&accent($R);
#
sub accent {
        my($R)=@_;
        $R =~ s/&eacute;/\xe9/g;
        $R =~ s/&egrave;/\xe8/g;
        $R =~ s/&ecirc;/\xea/g;
        $R =~ s/&ugrave;/\xf9/g;
        $R =~ s/&agrave;/\xe0/g;
        $R =~ s/&acirc;/\xe2/g;
        $R =~ s/&icirc;/\xee/g;
        $R =~ s/&ocirc;/\xf4/g;
        $R =~ s/&ucirc;/\xfb/g;
        $R =~ s/&ccedil;/\xe7/g;
        return $R;
           }

# Subroutine to give help
sub usage {
        print "Usage : $0 -dah data_file (or wild card entry) \n";
	print " \t-Options: -d where d is for debug mode  \n";
        print " \t          -v where v is for verbose mode  \n"; 
        print " \t          -h to replace accents by html codes, and \n"; 
        print " \t          -a for the reverse operation \n";
        }













