#!/usr/local/bin/perl -w
@REM=("
@perl -S %0.bat %1 %2 %3 %4 %5 %6 %7 %8 %9
@goto end ") if 0 ;
#
# Fix and convert tape archives
#
# (C) Copyright 1990,91 Diomidis Spinellis.  All rights reserved.
#
# Permission to use, copy, and distribute this software and its
# documentation for any purpose and without fee is hereby granted,
# provided that the above copyright notice appear in all copies and that
# both that copyright notice and this permission notice appear in
# supporting documentation.
# 
# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# dds@doc.ic.ac.uk
#
# This program is not efficient.
# A have tried instead to make it readable, flexible and easy to use.
# For this I use lots of subroutines, local variables and higher order 
# functions.  If you want something efficient rewrite it in C.
#
# You can very easily add a new conversion mode.  Just add the conversion
# function.  If the conversion function needs to be applied to every
# filename component add a map function and a function to prepare a component
# to have acounter added to it.
# If you need to convert another part of the header (e.g. uid) search for the
# string HEADERMOD in this file.

do 'getopts.pl' || die "$0: Unable to find getopts library: $!\n";
&Getopts('mncaf:t:s:e') || $usage++;

$count = 0;
$proc = 'copy';

if ($opt_c) {
	$proc = 'canonic';
	$count++;
}
if ($opt_a) {
	$proc = 'noabs';
	$count++;
}
if ($opt_f) {
	$proc = 'from_' . $opt_f;
	$count++;
}
if ($opt_t) {
	$proc = 'to_' . $opt_t;
	$count++;
}
if ($opt_s) {
	$proc = 'usershort';
	$usershortlen = $opt_s;
	$count++;
}

if ($count > 1) {
	print STDERR "$0: Only one of -c -a -f -t -s can be specified\n";
	$usage++;
}

if (! eval('&' . $proc . '("foo");')) {
	print STDERR "$0: Bad option specified $proc\n";
	$usage++;
}
delete $map{'foo'};


if ($usage) {
	print STDERR "Usage: $0 -n -c -a -s N -f msdos|vms -t msdos|v7";
	print STDERR "
	-n Do not work on tar files.  Read and print a list of file names.
	-c Canonicalise filenames by removing /../, /./ and //.
	-a Fix absolute filenames by removing leading /.
	-s Convert filenames to the specified length N.
	-f Convert from format.  Format can be msdos or vms.
	-t Convert to format.  Format can be v7 (7th Edition) or msdos.
	-m Print a map table containing initial and final name on stderr.
	-e Remove trailing EOF

	Only one of -c -a -f -t -s can be specified.\n";
	exit 1;
}

if ($opt_n) {
	while (<>) {
		s/\n$//;
		print &$proc($_), "\n";
	}
} else {
	&copytar();
}

if ($opt_m) {
	while (($from, $to) = each(%map)) {
		print STDERR "$from $to\n";
	}
}

exit 0;

# Remove absolute file names.
# We canonicalise since foo//bar is /bar on many Unixes
sub noabs {
	local($name) = $_[0];

	 $name = &canonic($name);
	 $name =~ s/^\///;
	 return $name;
}

# Convert to MS-DOS
# - Shorten name to 8 characters
# - Remove all dots, but the last one
# - Shorten extension to 3 characters
# - Convert ,=+<>|; *?:[]\" to ^
# - Convert device name (con, aux ...) to _device
sub to_msdos {
	local($nm);

	$nm = $_[0];
	return &filemap($nm, 'mapmsdos', 'countprepmsdos');
	# The following line fails on perl 3.0 patchlevel 18 XXX
	# return &filemap($_[0], 'mapmsdos', 'countprepmsdos');
}

# Shortening routine for MS-DOS
sub mapmsdos {
	local($name) = $_[0];
	local($ext);

	# Leave only the last dot
	while ($name =~ s/(.*)\.(.*)\.(.*)/\1_\2.\3/g) { 
		;
	}
	# Convert funny characters to ^
	$name =~ s/[,=+<>|; *?:[\]\\]/^/g;
	# Shorten the name
	if ($name =~ m/\./) {
		($name, $ext) = split(/\./, $name);
		$name =~ s/^((con)|(aux[1-4]?)|(prn)|(lpt[1-3])|(clock\$))$/_\1/i;
		return &shorten($name, 8) . '.' . &shorten($ext, 3);
	} else {
		$name =~ s/^((con)|(aux[1-4]?)|(prn)|(lpt[1-3])|(clock\$))$/_\1/i;
		return &shorten($name, 8);
	}
}

# Count preparation routine for MS-DOS
sub countprepmsdos {
	local($name) = $_[0];
	local($ext);
	
	if ($name =~ m/\./) {
		($name, $ext) = split(/\./, $name);
		return $name . '.' . substr($ext, 0, 1);
	} else {
		return $name . '.';
	}
}


# Convert to 7th Edition type filesystems
# - Shorten filenames to 14 characters
sub to_v7 {
	local($nm);

	$nm = $_[0];
	return &filemap($nm, 'mapv7', 'countprepv7');
}

# Shortening routine for V7
sub mapv7 {
	return &shorten($_[0], 14);
}

# Count preparation routine for V7
sub countprepv7 {
	return substr($_[0], 0, 12);
}

# Shorten the filename components by a user specified amount
sub usershort {
	local($nm);

	$nm = $_[0];
	return &filemap($nm, 'mapusershort', 'countprepusershort');
}

# Shortening routine for usershort
sub mapusershort {
	return &shorten($_[0], $usershortlen);
}

# Count preparation routine for usershort
sub countprepusershort {
	return substr($_[0], 0, $usershortlen);
}

# Convert from VMS
# - Convert uppercase to lowercase
# - Remove leading device name: or node::
# - Convert directory form [xxx] to xxx/
# - Remove trailing generation number
# - Remove quoting characters ^V and " (XXX)
# NOTE:  I am an ignorant on VMS, so this probably need fixing.  UNTESTED
sub from_vms {
	local($name) = $_[0];

	$name =~ tr/[A-Z]\\/[a-z]\//;
	$name =~ s/^[a-z]*::?//;
	$name =~ s/\[(.*)\](.*)/\1\/\2/;
	$name =~ s/;[0-9]+$//;
	$name =~ s/["\026]//g;
	return $name;
}

# Convert from MS-DOS
# - Convert \ to /
# - Convert uppercase to lowercase
# - Remove leading device names
sub from_msdos {
	local($name) = $_[0];

	$name =~ tr/[A-Z]\\/[a-z]\//;
	$name =~ s/^[a-z]://;
	return $name;
}

# filemap(name, mapfunc, countprepfunc)
# Go through every path element of name substituting it with the result
# of mapfunc(element).  If the filename is already used then substitute it
# with the result of applying countprepfunc to with a two letter counter 
# appended.
# Two associative arrays are kept to avoid the chance of re-using a name
# %map contains the mappings from big names to small names
# %used contains 1 for every short name that has been used
# We keep partial file names to speed up the process
# The filenames are always canonicalised
sub filemap {
	local(
		@big,		# Contains components of original
		@small,		# Result is built in here
		@bigpart,	# Part of big that has been done
		@s,		# To try alternative mappings
		$name,		# Part of path we are dealing with
		$count,		# To create distinct names
		$try,		# Remember map result
		$mapfunc,	# Function to create new elements
		$countprepfunc	# Function to prepare for counting

	);

	$mapfunc = $_[1];
	$countprepfunc = $_[2];
	@big = split(/\//, &canonic($_[0]));
	@small = @bigpart = ();
	while (defined($name = shift(@big))) {
		push(@bigpart, $name);
		if (defined($try = $map{join('/', @bigpart)})) {
			# Found in map
			@small = split(/\//, $try);
			# The next line is needed because of buggy split
			# split(/x/, '') should give ('') not ()
			@small = ('') if $#small == -1;
		} else {
			# Create new map
			# Even if the name is short we may have used it up
			# by shortening up a bigger one, so we may have to
			# count
			$name = &$mapfunc($name);
			$count = '';
			while ($used{join('/', @s = (@small, $name . $count))}) {
				if ($count eq '') {
					$name = &$countprepfunc($name);
					$count = 'AA';
				} else {
					$count++;
				}
			}
			@small = @s;
			$used{join('/', @small)} = 1;
			$map{join('/', @bigpart)} = join('/', @small);
		}
	}
	return join('/', @small);
}

#
# Convert a single string to something close to it with length up
# to length given
sub shorten {
	local($str, $len) = @_;

	# Do "fonetic speling" from end to beginning
	while (
		length($str) > $len && (
			$str =~ s/(.*)([fglmnprst])\2(.*)/\1\2\3/i ||
			$str =~ s/(.*)(ou)(.*)/\1u\3/i ||
			$str =~ s/(.*)(ck)(.*)/\1k\3/i ||
			$str =~ s/(.*)(ks)(.*)/\1x\3/i ||
			$str =~ s/(.*)(sh)(.*)/\1s\3/i ||
			$str =~ s/(.*)(ph)(.*)/\1f\3/i ||
			$str =~ s/(.*)(oo)(.*)/\1u\3/i
		)
	) { ; }
	# Remove lowercase vowels from the end to the beginning
	while (
		length($str) > $len && 
		$str =~ s/(.*)[aeiou](.*)/\1\2/
	) { ; }
	# Remove uppercase vowels from the end to the beginning
	while (
		length($str) > $len && 
		$str =~ s/(.*)[AEIOU](.*)/\1\2/
	) { ; }
	# Finally cut characters from the end
	$str = substr($str, 0, $len);
	return $str;
}

# Create a canonic file name out of one containing .. and . 
# Employ Unix semantics: empty file means root directory.
sub canonic {
	local(@comp, @can);

	@comp = split(/\//, $_[0]);
	for ($i = 0; $i <= $#comp; $i++) {
		if ($comp[$i] eq '.') {
			;
		} elsif ($comp[$i] eq '') {
			@can = ();
			push(@can, '');
		} elsif ($comp[$i] eq '..') {
			pop(@can);
		} else {
			push(@can, $comp[$i]);
		}
	}
	return join('/', @can);
}

# A do nothing procedure
sub copy {
	return $_[0];
}

# Copy a tape archive from stdin to stdout
sub copytar {
	binmode STDIN;
	binmode STDOUT;

	forloop: for(;;) {
		read(STDIN, $header, 512) == 512 || die "$0: Couldn't read header: $!\n";
		if ($header eq "\0" x 512) {
			last forloop;
		}
		($name, $mode, $uid, $gid, $size, $mtime, $checksum, $linkflag, $linkname) = unpack("a100 A7x A7x A7x A12 A12 a8 a1 a100", $header);
		#
		# Header modification code should be put here
		# HEADERMOD
		$name =~ s/[\000 ]*//g;
		$name = &$proc($name);
		if ($linkflag != "\0") {
			$linkname =~ s/[\000 ]*//g;
			$linkname = &$proc($linkname);
		}

		# Create dummy header for checksum calculation (checksum is blanks)
		$hnew = pack("a99x A6a1x A6a1x A6a1x A11a1 A11a1 a8 a1 a99x x255", 
			($name, $mode, ' ', $uid, ' ', $gid, ' ', $size, ' ', $mtime, ' ', ' ' x 8, $linkflag, $linkname));
		$sz = $size;
		$sz =~ s/ *//g;
		$sz = oct($sz);
		$checksum =~ s/ *//g;
		$checksum = oct($checksum);
		$newcheck = &check($hnew);
		# Create the header with the new checksum
		$hnew = pack("a99x A6a1x A6a1x A6a1x A11a1 A11a1 a6xa1 a1 a99x x255", 
			($name, $mode, ' ', $uid, ' ', $gid, ' ', $size, ' ', $mtime, ' ', sprintf('%6o', $newcheck), ' ', $linkflag, $linkname));
		print STDOUT $hnew;
		# Copy contents
		for ($i = 0; $i < $sz; $i += 512) {
			read(STDIN, $contents, 512) == 512 || die "$0: Couldn't read data: $!\n";
			print STDOUT $contents;
		}
		#seek(STDIN, (int($sz / 512) + 1) * 512, 1) unless $sz == 0;
	}
	# Write EOF
	if (!$opt_e) {
		print STDOUT pack("x512", ());
		print STDOUT pack("x512", ());
	}
}

# Return checksum for tar header block
sub check {
	$h = $_[0];
	local($i, $s);

	$s = 0;
	for($i = 0; $i < 512; $i++) {
		$s += unpack('C', substr($h, $i, 1));
	}
	return $s;
}

"
:end ", 0;
