#!/usr/bin/perl
########################################################################
#
# $Id$
#
# Id: code_auto_detect,v 1.10 2002/12/02 18:59:36 gosha Exp 
#
# Copyright (C) 2001-2002 Okunev Igor <gosha@prv.mts-nn.ru>
#
########################################################################
use strict;

use lib qw( /opt/VirtualPBX/lib );

use Cyrillic::CPdetect;
use Cyrillic::CPconvert;

use File::Basename;

my ( $len_file, $code_page, $cnt, $data_offset, $data_str, $ptr, $my_name );
my %ARGS = @ARGV;
my %ABBR = (
		dos		=> 'IBM866',
		win		=> 'CP1251',
		koi		=> 'KOI8-R',
		iso		=> 'ISO-8859-5',
		utf8	=> 'UTF8'
	);

$my_name = basename($0);

if ( $my_name =~ /(.+)2(.+)/ ) {
	$ARGS{'-s'} = $ABBR{$1};
	$ARGS{'-d'} = $ABBR{$2};
	if ( not exists $ARGS{'-n'} ) {
		if ( ( $1 eq 'dos' || $1 eq 'win' ) && ( $2 eq 'koi' || $2 eq 'iso' ) ) {
			$ARGS{'-n'} = 2;
		} elsif ( ( $2 eq 'dos' || $2 eq 'win' ) && ( $1 eq 'koi' || $1 eq 'iso' ) ) {
			$ARGS{'-n'} = 1;
		}
	}
}

if ( $^O =~ /win/i ) {
	$ARGS{'-d'} = 'CP1251' unless $ARGS{'-d'};
} elsif ( $^O =~ /dos/i || $^O =~ /os2/i ) {
	$ARGS{'-d'} = 'IBM866' unless $ARGS{'-d'};
} else {
	$ARGS{'-d'} = 'KOI8-R' unless $ARGS{'-d'};
}

$ARGS{'-t'} = 0 if ( ! exists $ARGS{'-t'} && $ARGS{'-s'} );
$ARGS{'-t'} = 1 unless exists $ARGS{'-t'};
$ARGS{'-b'} = 10 unless exists $ARGS{'-b'};
$ARGS{'-c'} = 'IBM866,CP1251,KOI8-R,UTF8';

unless ( -f $ARGS{'-f'} ) {
	while ( <STDIN> ) {
		$data_str .= $_;
	}
	if ( $ARGS{'-s'} ) {
		$code_page = $ARGS{'-s'};
	} else {
		if ( length($data_str) > $ARGS{'-b'} * 1000 && $ARGS{'-b'} > 0 ) {
			undef $_;
			$data_offset = int length($data_str) / $ARGS{'-b'};
			for $cnt ( 0..($ARGS{'-b'} - 1) ) {
				$_ .= substr( $data_str, $data_offset*$cnt, 1000 ) . ' ';
			}
			$code_page = Cyrillic::CPdetect->cp_detect( $_, $ARGS{'-t'}, split(/\s*,\s*/, $ARGS{'-c'}) );
		} else {
			$code_page = Cyrillic::CPdetect->cp_detect( $data_str, $ARGS{'-t'}, split(/\s*,\s*/,$ARGS{'-c'}) );
		}
	}

	print STDERR "CODEPAGE = $code_page\n" if $ARGS{'-t'};

	if ( ( $code_page ne '' ) && ( $code_page ne $ARGS{'-d'} ) )  {
		$ptr = Cyrillic::CPconvert->new( $code_page, $ARGS{'-d'} );
		if ( defined $ptr and ref $ptr eq 'CODE' ) {
			$ptr->($data_str);
		} else {
			print STDERR "Unknown codepage...\n";
		}
	}
	if ( $ARGS{'-n'} == 1 ) {
		$data_str =~ s#\n#\r\n#gs;
	} elsif ( $ARGS{'-n'} == 2 ) {
		$data_str =~ s#\r\n#\n#gs;
	}
	print $data_str;
} else {
	$len_file = -s $ARGS{'-f'};
	open( FILE, '<' . $ARGS{'-f'} ) || die "Can't load data [$!]\n";
	if ( $ARGS{'-s'} ) {
		$code_page = $ARGS{'-s'};
	} else {
		if ( $len_file <= $ARGS{'-b'} * 1000 || $ARGS{'-b'} == 0 ) {
			read( FILE, $data_str, $len_file );
		} else {
			$data_offset = int $len_file / $ARGS{'-b'};
			for $cnt ( 0..($ARGS{'-b'} - 1) ) {
				seek( FILE, $data_offset*$cnt, 0 );
				read( FILE, $_, 1000 );
				$data_str .= "$_ ";
			}
		}
		$code_page = Cyrillic::CPdetect->cp_detect( $data_str, $ARGS{'-t'}, split(/\s*,\s*/,$ARGS{'-c'}) );
	}
	seek( FILE, 0, 0 );

	print STDERR "CODEPAGE = $code_page\n" if $ARGS{'-t'};

	if ( ( $code_page ne '' ) && ( $code_page ne $ARGS{'-d'} ) )  {
		$ptr = Cyrillic::CPconvert->new( $code_page, $ARGS{'-d'} );
		if ( not defined $ptr or ref $ptr ne 'CODE' ) {
			print STDERR "Unknown codepage...\n";
			while ( read( FILE, $data_str, 4096) ) {
				print $data_str;
			}
		} else {
			while ( read( FILE, $data_str, 4096) ) {
				$ptr->($data_str);
				if ( $ARGS{'-n'} == 1 ) {
					$data_str =~ s#\n#\r\n#gs;
				} elsif ( $ARGS{'-n'} == 2 ) {
					$data_str =~ s#\r\n#\n#gs;
				}
				print $data_str;
			}
		}
	} else {
		while ( read( FILE, $data_str, 4096) ) {
			print $data_str;
		}
	}
	close( FILE );
}
