#!/usr/local/bin/perl # Purpose: Rewrites 'IMG SRC=""' tags to include WIDTH= and HEIGHT= attributes. # fiximg.pl version 1.1 # # Written by Patrick Atoon . # Bugs fixed by Greg Weir . # # The kinky part is that it parses the images to find out the true # height and width. Works only for JPG and GIF, though. Other images # should be left alone. # # Usage: fiximg.pl [ -h | [...]] # # No fear; a backup will be made under the name .bak, # just in case this messes up things badly. # # Where to find the web-documents, same as httpd's DocumentRoot. # CHANGE THIS TO YOUR DOCUMENTROOT! $DocumentRoot = "/www"; # This is what I use on my home UNIX machine. -Joe $ARGC = scalar(@ARGV); # Should we give some info? if ($ARGV[0] eq "-h" || $ARGC == 0) { &print_help; exit; } for ($ind = 0; $ind < $ARGC; $ind++) { $infile = $ARGV[$ind]; print "Fixing \"$infile\"\n"; $outfile = $infile . ".tmp"; # Determine path to $infile $i = rindex($infile, "/"); if ($i >= 0) { $fileroot = substr($infile, 0, $i+1); } else { $fileroot = "./"; } if (!open(IN, $infile)) { print "Cannot open file \"$infile\"\n"; next; } if (!open(OUT, ">$outfile")) { print "Cannot open file \"$outfile\"\n"; next; } while() { chop; $line = $_; if ($line =~ / tag. while (rindex($line, ">") < $begin) { $inline = ; chop($inline); $line .= " " . $inline; # Add a safety space } # Cut out the tag $end = index($line, ">", $begin); # Preserve the bit in front if this IMG tag. if ($begin != 0) { $newline .= substr($line, 0, $begin); } $img_tag = substr($line, $begin, ($end-$begin)+1); if ($end < length($line)) { $line = substr($line, $end+1); } # Make sure the end of the IMG tag is also on this line # Now add WIDTH and HEIGHT to the IMG tag $filename = $img_tag; $filename =~ s/.* SRC=([^\s>]*).*/$1/i; $filename =~ s/"//g; if ($filename =~ m|^/|) { $filename = $DocumentRoot . $filename; } else { $filename = $fileroot . $filename; } if ($filename =~ /.jpg$/i || $filename =~ /.jpeg$/i) { $wh = &JPEG_size($filename); # Determine the size $img_tag =~ s/ *WIDTH=[0-9]*//i; # Throw away old width $img_tag =~ s/ *HEIGHT=[0-9]*//i; # Throw away old height $img_tag =~ s/>/ $wh>/; # Insert before ">" } elsif ($filename =~ /.gif$/i) { $wh = &GIF_size($filename); # Determine the size $img_tag =~ s/ *WIDTH=[0-9]*//i; # Throw away old width $img_tag =~ s/ *HEIGHT=[0-9]*//i; # Throw away old height $img_tag =~ s/>/ $wh>/; # Insert before ">" } $newline .= $img_tag; } $line = $newline . "$line"; } $line .= "\n"; print OUT $line; } close(IN); close(OUT); system("/bin/mv $infile $infile.bak"); system("/bin/mv $outfile $infile"); } exit; #########################################################################3 # # Subroutines # # This is a little hack to determine the width and height in pixels of # a JPEG image. It is a hack because it assumes that some markers # exist which in fact don't. This should really cause no problems, # however. # Original written by Marcus E. Hennecke sub JPEG_size { # Define marker types local($M_SOF0) = 0xC0; local($M_SOF15) = 0xCF; local($M_SOI) = 0xD8; local($M_EOI) = 0xD9; local($M_SOS) = 0xDA; local($M_COM) = 0xFE; local($l,$d,$h,$w); local($fn) = @_; if (!open(IMAGE, $fn)) { print "Could not open file \"$fn\"!\n"; return ""; } # Check the first few bytes to see if this is a JPEG file. From the docs: # # o you can identify a JFIF file by looking for the following sequence: # X'FF', SOI, X'FF', APP0, <2 bytes to be skipped>, "JFIF", X'00'. local($c1) = &read_1_byte; local($c2) = &read_1_byte; if ($c1 != 0xFF || $c2 != $M_SOI) { print("\"$fn\" is not a JPEG file!\n"); close(IMAGE); return ""; } # Go through the markers in the header. Stop when height and width are # determined or when end of header is reached while (1) { # Get the next marker local($db) = 0; $c1 = &read_1_byte; while ($c1 != 0xFF) { $db++; $c1 = &read_1_byte; } while ($c1 == 0xFF) { $c1 = &read_1_byte; } if ($db) { print("Warning: garbage data found in JPEG file \"$fn\"\n"); } # What type marker are we looking at? # Note that this first if statement is actually not quite correct. # It assumes that the markers SOF0 to SOF15 all exist and are in # order. In reality, they are in order, but SOF4, SOF8, and SOF12 # do not exist. Nevertheless, these markers should not normally # appear in a JPEG file and so this if statement works. if ($c1 >= $M_SOF0 && $c1 <= $M_SOF15 && $c1 != 0xC4) { # Do we have width and height? ($l,$d,$h,$w) = unpack("nCnn", &read_n_bytes(7)); $l = &ushort($l); $h = &ushort($h); $w = &ushort($w); close(IMAGE); return "WIDTH=${w} HEIGHT=${h}"; } elsif ($c1 == $M_SOS || $c1 == $M_EOI) { # Did we reach header end? close(IMAGE); return ""; } else { # Otherwise, skip this variable $l = &ushort(unpack("n", &read_n_bytes(2))) - 2; if ($l < 0) { print("Erroneous JPEG marker length in file \"$fn\"!\n"); close(IMAGE); return ""; } &read_n_bytes($l); } } } # # Determine the size of a GIF file # sub GIF_size { local($fn) = @_; if (!open(IMAGE, $fn)) { print "Could not open file \"$fn\"!\n"; return ""; } $read = &read_n_bytes(6); if ($read ne "GIF87a" && $read ne "GIF89a") { print "\"$fn\" is not a GIF file!\n"; close(IMAGE); return ""; } # Examine the Logical Screen Descriptor local($lsw, $lsh, $pf, $bg, $par) = unpack("vvCCC", &read_n_bytes(7)); # Is it followed by a Global Color table? if ($pf & 0x80) { # Skip the Global Color Table local($GCTsize) = $pf & 0x07; &read_n_bytes(3 * (2 << $GCTsize)); } # Go through the markers in the header. Stop when height and width are # determined or when end of header is reached while (1) { # Get the next marker $c = &read_1_byte; if ($c == 0x21) { # # This is an Extension. # # Read the label. $c = &read_1_byte; # Read the remainder of this Extension Block and while we're at it, # read all possible Data Sub-blocks as well. while ($blksize = &read_1_byte) { &read_n_bytes($blksize); } } elsif ($c == 0x2c) { # # This is the most holy of all... The Image Descriptor. # local($lp,$tp,$w,$h,$pf) = unpack("vvvvC", &read_n_bytes(9)); $w = &ushort($w); $h = &ushort($h); close(IMAGE); return "WIDTH=${w} HEIGHT=${h}"; } else { close(IMAGE); return ""; } } } # Reads one byte. If EOF is reached, terminates with an error message. sub read_1_byte { return ord(getc(IMAGE)); } # Reads N bytes. If EOF is reached, terminates with an error message. sub read_n_bytes { local($n) = @_; local($ch); read(IMAGE, $ch, $n) == $n || print("Premature EOF in GIF file \"$fn\"!\n"); return $ch; } # Make a signed short unsigned. sub ushort { local($n) = @_; if ($n < 0) { $n += 65536; } return $n; } sub print_help { print <