#!/usr/bin/perl -w
=pod

=head1 NAME

tv_grab_za - Grab TV listings for South Africa.

=head1 SYNOPSIS

tv_grab_za --help

tv_grab_za [--config-file FILE] --configure [--gui OPTION]

tv_grab_za [--config-file FILE] [--output FILE] [--days N]
	   [--quiet] [--retries N]

=head1 DESCRIPTION

Output TV listings for DSTV channels available in South Africa.
The data comes from www.dstv.com. The grabber relies on
parsing HTML so it might stop working at any time.

First run B<tv_grab_za --configure> to choose, which channels you want
to download. Then running B<tv_grab_za> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels,
and write the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_za.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.

B<--output FILE> write to FILE rather than standard output.

B<--days N> grab N days.  Can be 1, 7, 14 or 30.  Default is 14

B<--quiet> suppress the progress messages normally written to standard
error.

B<--retries> number of retries before failing channel download.

B<--help> print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHORS
Chris Picton <cpicton@users.sf.net>
Neil Garratt <ngarratt@users.sf.net>

Based on tv_grab_fi by Matti Airas.

Latest version always available at http://xmltv.cvs.sourceforge.net/xmltv/xmltv/grab/za/

=head1 BUGS

Does not automatically update itself, when DSTV changes their site

=cut

######################################################################
# initializations

use strict;

use XMLTV::Version '$Id: tv_grab_za,v 1.42 2011/05/12 07:05:51 dekarl Exp $ ';
use XMLTV::Capabilities qw/baseline manualconfig cache/;
use XMLTV::Description 'South Africa';


use Getopt::Long;
use List::Util qw(min);
use List::Util qw(max);
use Date::Manip;
use HTML::TreeBuilder;
use HTML::Entities; # parse entities
use IO::File;
use Digest::MD5 qw(md5 md5_hex);

use POSIX qw(strftime);


#use LWP::Simple qw($ua);
use LWP::Simple;
use LWP::UserAgent;
use HTTP::Request::Common qw(GET);
use HTTP::Cookies;



use XMLTV;
use XMLTV::Memoize;
use XMLTV::ProgressBar;
use XMLTV::Ask;
use XMLTV::Config_file;
use XMLTV::DST;
use XMLTV::Get_nice;
use XMLTV::Mode;
use XMLTV::Date;
# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get South African television listings in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
		[--quiet] [--retries]
END
  ;

# Attributes of the root element in output.
my $HEAD = { 'source-info-url'	 => 'http://www.dstv.com/',
			 'source-data-url'	 => "http://www.dstv.com/dstv-guide/default.asp",
			 'generator-info-name' => 'XMLTV',
			 'generator-info-url'  => 'http://xmltv.org/',
		   };

# The timezone in South Africa.
my $TZ="+0200";

# default language
my $LANG="en";

our %dstvchannelmap;
our %mnetchannelmap;

my %mnetchannelfixups = (
    'Africa Magic Channel (C-Band)' => 'AfricaMagic',
    'Bloomberg Information TV' => 'Bloomberg',
    'China Central Television 4' => 'CCTV 4',
    'Channel O - Sound Television' => 'Channel O',
    'CNBC' => 'CNBC Africa',
    'CNN International' => 'CNN',
    'Deukom - DW' => 'Deutchse Welle',
    'E-Entertainment' => 'E! Entertainment',
    'eTV' => 'e-TV',
    'Go (K-World Teen)' => 'GO',
    'Hallmark Entertainment Network' => 'Hallmark',
    'K-TV World' => 'K-All Day',
    'M-Net Domestic' => 'M-Net',
    'M-Net Series' => 'M-Net Series',
    'Parliamentary Service' => 'Parliamentary',
    'Reality TV' => 'Zone Reality',
    'Rhema Network' => 'Rhema TV',
    'Summit' => 'Summit TV',
    'SuperSport' => 'SuperSport 1',
    'SuperSport 3' => 'SuperSport 3 (Soccer)',
    'SuperSport 5' => 'SuperSport 5 (Highlights)',
    'SuperSport Zone Mosaic' => 'SuperSport Zone',
    'Trinity Broadcasting Network' => 'TBN',
    'Turner Classic Movies' => 'TCM',
    'TV5 Afrique' => 'TV5',
    'VH1' => 'VH-1',
	);

my %dstvchannelfixups = (
   'CNN International' => 'CNN',
   'Sony Entertainment Television' => 'Sony Entertainment',
   'SABC 1' => 'SABC1',
   'SABC 2' => 'SABC2',
   'SABC 3' => 'SABC3',
   'Crime & Investigation Network' => 'Crime & Investigation',
   'E! Entertainment Television' => 'E! Entertainment',
   'SuperSport MaXimo 1' => 'SuperSport Maximo',
   'MagicWorld' => 'Magic World',
   'Deutsche Welle' => 'Deutchse Welle'
   );

#These entries appear on the channel index page, but no schedules for them exist on the site.
my @dstvignorechannels = ('Soweto TV', 'Ignition');

#my %dstvfilehashes = (
#	'1494729404' => '0',
#	'3139098187' => '1',
#	'2091571851' => '2',
#	'2860538121' => '3',
#	'3348398793' => '4',
#	'1813599985' => '5',
#	'1153776246' => '6',
#	'1367985183' => '7',
#	'3033721747' => '8',
#	'2699942871' => '9',
#	);
my %dstvfilehashes = (
	'937c943580ac202fc64a80dbd3be3aab' => '0',
	'40154b2e17f12abc83304910e8b2c184' => '1',
	'261d6eeefee8ee6f398e8d4bef8b51df' => '2',
	'f0e730108d788a4fef7966157d223e12' => '3',
	'309cad2597b2273ecda6614169e79a78' => '4',
	'675fd8104b6fa3ae317cbdc7cb301400' => '5',
	'1d8960a26dce4fd9172a06154d66f692' => '6',
	'479765dcd17d683a3fdbcd5740e11c15' => '7',
	'6eeba41c618fdba24c8fd554023385a9' => '8',
	'f888465466ffa7c7c3cc6c5f12414ad3' => '9',
);

our %dstvtimehashes = ();

my $viewstate = '';
my $eventvalidation = '';

my $ua = initialise_ua();

# Set up cache if needed
XMLTV::Memoize::check_argv('get_url');

######################################################################
# Get options.
my ($opt_days, $opt_offset, $opt_help, $opt_output,
	$opt_configure, $opt_config_file, $opt_gui,
	$opt_quiet, $opt_list_channels, $opt_opentime, 
	$opt_opentime_combined, $opt_retries, $opt_mnet_fallback,
	$opt_dstv_fallback, $days_exceeded);
#$opt_days  = 14; # default
$opt_quiet  = 0; # default
GetOptions('days=i'		=> \$opt_days,
	   'offset=i'		   => \$opt_offset,
		   'help'		  => \$opt_help,
		   'configure'	 => \$opt_configure,
		   'opentime'	  => \$opt_opentime,
		   'opentime-combined'	  => \$opt_opentime_combined,
		   'config-file=s' => \$opt_config_file,
		   'gui:s'		 => \$opt_gui,
		   'output=s'	  => \$opt_output,
		   'quiet'		 => \$opt_quiet,
		   'retries'		 => \$opt_retries,
		   'mnet-fallback'		 => \$opt_mnet_fallback,
		   'list-channels'		 => \$opt_list_channels,
		  )
  or usage(0);

# dstv.com only allows us to grab one of a few fixed day ranges and
# they all start from today.  For baseline compliance, data outside 
# the range specified is stripped. mnet.co.za doesn't have this issue,
# so it's now the default site to use. An extra day is also downloaded
# just to calculate the end time of the last program of the previous day
#
my %allowed_days = (1 => 0, 7 => 1, 14 => 2);
die "--offset cannot be negative" if defined $opt_offset and $opt_offset < 0;
die "--days must be positive" if defined $opt_days and $opt_days <= 0;
if ($opt_offset) {
	$opt_days += $opt_offset;
} else { $opt_offset = 0; }

my $use_days;
if ($opt_days) {
    $use_days = min grep { $_ >= $opt_days } keys %allowed_days;
    if (not defined $use_days) {
        $opt_days = $use_days = max keys %allowed_days;
        warn "rounding down to $use_days days for download (must be one of ".(keys %allowed_days).")\n";
        $days_exceeded = 1;
    } elsif (($use_days != $opt_days) && (!$opt_quiet)) {
        warn "dstv.com only supports the following days: ".(keys %allowed_days).". $use_days day(s) will be downloaded and extraneous ones skipped\n";
    }
# OK, now $use_days has the number of days to grab starting from now,
# if that was specified on the command line. If this is specified in the 
# config file it will also set this variable. $opt_days will contain the 
# number of days we actually want to keep data for.

}


# Default retries = 3;
$opt_retries = 3 if !$opt_retries;

usage(1) if $opt_help;



XMLTV::Ask::init($opt_gui);

my $mode = XMLTV::Mode::mode('grab', # default
							 $opt_configure => 'configure',
							 $opt_list_channels => 'list-channels',
							);

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_za', $opt_quiet);


 
init_cookies();

if ($mode eq 'configure') {
	mode_configure();
	exit();
}

# Whatever we are doing, we need the channels data.
my %channels;
my %options;

read_config(\%channels);

if (not defined $use_days) {
    # Not got from command line or config file; default it.
    $opt_days = $use_days = 14;
}


#$opt_dstv_fallback = 1 if !(defined $opt_mnet_fallback  || defined $opt_dstv_fallback);



#######################################
# Options to be used for XMLTV::Writer.
my %w_args;
if (defined $opt_output) {
	my $fh = new IO::File(">$opt_output");
	die "cannot write to $opt_output: $!" if not defined $fh;
	$w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'ISO-8859-1';
my $writer = new XMLTV::Writer(%w_args);
$writer->start($HEAD);

if ($mode eq 'list-channels') {
	# Write channels mode.
	foreach my $id (keys %channels) {
		$writer->write_channel({id => $id, 'display-name' => [ [ $channels{$id}, 'en' ] ]});
	}
	$writer->end();
	exit();
}

######################################################################
# We are producing full listings.
die if $mode ne 'grab';


# Prepare channel maps

#get_mnet_channel_mappings() if defined $opt_mnet_fallback || $opt_dstv_fallback;

#get_dstv_channel_mappings();
#if (keys %dstvchannelmap == 0) {
# die "error: can't open channel map (http://www.dstv.com/DStv_Guide/default.aspx)";
#}
######################################################################
# begin main program


# Print out the channels
die "No channels specified, run me with --configure first\n"
  if not keys %channels;

foreach my $chanid (keys %channels) {
	my $n=$channels{$chanid};
	my $ch_xid="$chanid.dstv.com";
	$writer->write_channel({ id => $ch_xid, 'display-name' => [ [ $n , 'en' ] ] });
}

my $bar = new XMLTV::ProgressBar('getting listings', (scalar keys %channels))
  if not $opt_quiet;

foreach (keys %channels) {
	process_html($_);
	update $bar if not $opt_quiet;
}
$bar->finish() if not $opt_quiet;
$writer->end();

if (defined $days_exceeded) {
    $! = 9;
    die;
}

######################################################################
# subroutine definitions

# Use Log::TraceMessages if installed.
BEGIN {
	eval { require Log::TraceMessages };
	if ($@) {
		*t = sub {};
		*d = sub { '' };
	}
	else {
		*t = \&Log::TraceMessages::t;
		*d = \&Log::TraceMessages::d;
		Log::TraceMessages::check_argv();
	}
}

sub tidy( $ ) {
	for (my $tmp = shift) {
		if (not defined $tmp) { return };
		tr/\t\205/ /d;
		s/([^\012\015\040-\176\240-\377]+)//g;
		return $_;
	}
}

# The URI to get listings for a given channel.
sub dstv_channel_uri( $ ) {
	my $ch = shift;
	my $mapped = dstv_channel_map($ch);
	die "cannot look up '$ch' in map" if not defined $mapped;
	die if not defined $use_days;
	my $days_param = $allowed_days{$use_days};
    return "http://guide.dstv.com/listing/default.aspx?drpChannels=$mapped&drpDays=$days_param";
}

# Returns the option ID on the DSTV site for a given channel name
sub dstv_channel_map ($) {
	my $ch = shift;
	if (!%dstvchannelmap) {
		get_dstv_channel_mappings();
	}
	return $dstvchannelmap{$ch};
}

sub mnet_channel_map ($) {
	my $ch = shift;
	if (!%mnetchannelmap) {
		get_mnet_channel_mappings();
	}
	return $mnetchannelmap{$ch};
}

sub post_process($$) {
	my $title = shift;
	my $desc = shift;

	my $r = undef;
	my $subtitle = undef;
        my $episode_num = undef;
	my $year = undef;
	my $actors = undef;
	my $director = undef;
	my $writers = undef;	   # Unused right now
	my $commentators = undef;  # Unused right now
	my $category = undef;
	my $subtitles = undef;
	my $dolby = undef;

	# Try to get full title from description if title seems cut off
	$title =~ s/(^\s+|\s+$)//g;
	if ($title =~ /\.\.\.$/ ) {
		my $temp = $title;
		$temp =~ s/.\.\.\.$//g;
		# Try get full title from description;
		if ($desc =~ /^'?(${temp}[^\.\?]+[^\'])'?[\.\?]\s+(.+)/i) {
			t "REMAPPING TITLE from $title to $1";
			$title = $1;
			$desc = $2;
			$title =~ s/(^\s+|\s+$)//g;
			$desc =~ s/(^\s+|\s+$)//g;
			t "New desc = $desc";
		}
	}

	if ($desc =~ /^'([^\.]+)'\.\s+(.+)/) {
		$subtitle = $1;
		$desc = $2;
		t "FOUND EPISODE TITLE: $subtitle";
		t "Title: $title";
		t "New desc = $desc";
		$category = "series";
	}

	if ($subtitle && $subtitle =~ /^S?(\d+)\/E?(\d+) - (.*)$/) {
		$episode_num = ($1-1) . "." . ($2-1) . ".0/1";
		$subtitle = $3;
		t "FOUND EPISODE NUMBER: $episode_num";
		$category = "series";
	}			

	if ($desc =~ /^Aka ([^\.]+)\. (.*)/) {
		$desc = $2;
		my $aka = $1;
		t "Aka found: $aka\n";
		# TODO - do something with the aka
	}
			
	if ($desc =~ /,? (HI|English) Subtitles\.?/) {
		$desc =~ s/,? (HI|English) Subtitles\.?//g;
		t "REMOVING Subtitles string";
		$subtitles = 1;
	}

	if ($desc =~ /,? DD\.?/) {
		$desc =~ s/,? DD\.?//g;
		t "REMOVING DD string";
		$dolby = 1;
	}

	if ($title =~ /^Press .i.$/) {
		$title = $subtitle;
		$subtitle = undef;
	}

	if ($desc =~ /(.*) \((\d{4})\)\s*([^\.]+)\.?\s*$/) {
		$year = $2;
		$director = $3;
		$desc = $1;
		t "desc = $desc\n";
		t "Year = $year\n";
		t "Director = $director\n";
	}
			
	if ($desc =~ /(.*) \((\d{4})\)\s*$/) {
		$desc = $1;
		$year = $2;
		t "desc = $desc\n";
		t "Year = $year\n";
	}
			
	if (defined $year && $desc =~ /(.*\.)\s+([^\.]+ [A-Z][^\.]+)\.\s*/) {
		$desc = $1;
		$actors = $2;
		if (defined $actors) {
			$actors =~ s/^\s+//g;
			$actors =~ s/\s+$//g;
			my @a = split(/,\s+/, $actors);
			$actors = [];
			foreach my $a (@a) {
				push @$actors, $a;
			}
		}
		$category = "movie";
	}

	# Trim whitespace from elements
	$title =~ s/(^\s+|\s+$)//g;
	$desc =~ s/(^\s+|\s+$)//g;
	$subtitle =~ s/(^\s+|\s+$)//g if $subtitle;

	$desc = "No description available" if ($desc eq "");

	$r->{title} = [[$title]];
	$r->{'sub-title'} = [[$subtitle]] if $subtitle;
	$r->{'episode-num'} = [[$episode_num, "xmltv_ns"]] if $episode_num;
	$r->{desc} = [[$desc]];
	$r->{category} = [[ $category, 'en' ]] if $category;
	$r->{'subtitles'} = [ { type => 'teletext' } ] if $subtitles;
	$r->{'audio'}->{"stereo"} = "dolby digital" if $dolby;
	# credits
	my %c;
	$c{director} = [ $director ] if $director;
	$c{actor} = $actors if $actors;
	$c{writer} = $writers if $writers;
	$c{commentator} = $commentators if $commentators;
	$c{director} = [ $director ] if $director;
	$r->{date} = $year if $year;

	$r->{credits} = \%c if %c;

	return $r;
}

sub process_dstv_html {
	my $chanid = shift;
	my $name = $channels{$chanid};
	
	my $now = time();
	my $data;
	my $tries = 0;

	# URI just for error reporting.
	my $uri = dstv_channel_uri $name;
	local $SIG{__DIE__} = sub { die "$uri:$_[0]\n" };
	$data = tidy(get_dstv($name));

	if ($data =~ /:error:(.*):/) {
		return $data;
	}



	# Get time mappings
#	get_dstv_time_mappings($data);

	# parse the page to a document object
	my $tree = HTML::TreeBuilder->new();
        $tree->utf8_mode(1);
	$tree->parse($data) or die "cannot parse content\n";
	$tree->eof;
	my ($prev_r, $r, $prev_time);
	
	my @array_ot;

	# Find the main table, and loop through all the table rows

	# Find the date headers on the page
	my @date_headers = $tree->look_down(_tag => "td", class => 'srch_date_chnl_head');
	my $offset_counter = 0;
	foreach my $td (@date_headers) {
        	$offset_counter++;
	        next if (($offset_counter <= $opt_offset) || ($offset_counter > $opt_days+1));
		my $date = $td->as_text();
		$date =~ s/^[^0-9]+//g;


		my $tr = $td->parent();
		while (($tr = $tr->right())) {
			last if !defined $tr;
		
			my $result = $tr;

            my @alternating = $result->look_down(
                sub {
                    lc($_[0]->attr('_tag')) eq 'td' and lc($_[0]->attr('class')) eq 'srch_rslt_alternating'
                    or lc($_[0]->attr('_tag')) eq 'td' and lc($_[0]->attr('class')) eq 'srch_rslt'
                }
            );

			my $temp;
            last if !defined $alternating[0];
            $temp = $alternating[0];
			my $time = $temp->look_down(_tag => 'b');
			die 'no <b> thing (for time) found' if not defined $time;
			$time = $time->as_text;
			$time =~ /^(\d)(\d):(\d)(\d)$/ or die "bad time '$time'";
			
            $temp = $alternating[1]->look_down(_tag => "a", name => qr/Bookmark/)->look_down(_tag => "b");
            my $title = $temp->as_text;

			my ($rating, $duration);
			my $tempstring = $temp->right->as_text();
            
			if (defined $tempstring) {
				$rating = $1 if $tempstring =~ /Rating: ?(.+?)\s+/;
				$duration = $1 if $tempstring =~ /Duration: ?([0-9:]+)/;
			}

			t "$title: $rating: $duration\n";

			my $desc = $alternating[2]->as_text();
			t "---\n$desc\n---\n";

			my $start = gen_start_time($date, $time, $now);

			my $r = post_process($title, $desc);

			$start = $start . " $TZ";

			if ($rating) { $rating =~ s/(^\s+|\s+$)//g; }
			else { $rating = "Family"; }

			$r->{rating} = [[$rating, "DSTV"]];
			$r->{start} = $start;
			$r->{channel} = "$chanid.dstv.com";

			if (defined $prev_r) {
				$prev_r->{stop} = $start;
				$writer->write_programme($prev_r);
			}

			$prev_time = $time;
			$prev_r = $r;
			if ($offset_counter > $opt_days) {
				$offset_counter++;
		                last;
			}
		}
	}
	$data = "success";
	$tree->delete;
	return $data;
}


sub process_mnet_html {
	my $chanid = shift;
	my $name = $channels{$chanid};
	
	my $now = time();
	my $data;
	my $tries = 0;
	$data = tidy(get_mnet($name));

	if ($data =~ /:error:(.*):/) {
		return $data;
	}

	# parse the page to a document object
	my $tree = HTML::TreeBuilder->new();
        $tree->utf8_mode(1);
	$tree->parse($data) or die "cannot parse get_mnet data for $name\n";
	$tree->eof;
	my ($prev_r, $r, $prev_time);
	
   	my @array_ot;

	# Find the date headers on the page

	my @tags = $tree->look_down(
	  sub {
		# the lcs are to fold case
		lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'scheduledate' 
		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'date' 
		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'scheduletime' 
		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'time' 
		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'scheduletitle' 
		or lc($_[0]->attr('_tag')) eq 'font' and lc($_[0]->attr('class')) eq 'title' 
		or lc($_[0]->attr('_tag')) eq 'p'
		or lc($_[0]->attr('_tag')) eq 'a'
	  }
	);


	my ($date,$time,$title,$rating);
	my $days_done = 0;
	foreach my $tag (@tags) {
		my $tag_text = $tag->as_text;
		if ($tag->attr('class') && lc($tag->attr('class')) =~ /date$/ ) {
		# Date
			$tag_text =~ s/\240/ /g;
			$tag_text =~ /(\d+) (.*) (\d{4})/;
			$date = "$1 $2";
			$days_done++;
			next;
		}
		if ($tag->attr('class') && lc($tag->attr('class')) =~ /time$/) {
		# Time
			$tag_text =~ /^\s?(\d\d:\d\d)$/;
			$time = "$1";
            
			next;
		}
		if ($tag->attr('class') && lc($tag->attr('class')) =~ /title$/) {
		# Title
			$tag_text =~ s/[\302\240]//g;
			$title = $tag_text;
			next;
		}

		if ($tag->attr('_tag') && $tag->attr('_tag') eq 'a' &&
			$tag->attr('onclick') && $tag->attr('onclick') =~ /OpenAgeRestriction/) {
		# Rating
			$rating = $tag_text;
			next;
		}

		if ($tag->attr('_tag') && $tag->attr('_tag') eq 'p' &&
			$date && $time && $title) {
		# Description

			my $desc = $tag_text;
			$desc =~ s/(^\s+|\s+$)//g;
			t "---\n$desc\n---\n";

			my $start = gen_start_time($date, $time, $now);
			
			my $r = post_process($title, $desc);

			$start = $start . " $TZ";

			if ($rating) { $rating =~ s/(^\s+|\s+$)//g; }
			else { $rating = "Family"; }

			$r->{rating} = [[$rating, "DSTV"]];
			$r->{start} = $start;
			$r->{channel} = "$chanid.dstv.com";

			if (defined $prev_r) {
				$prev_r->{stop} = $start;
				$writer->write_programme($prev_r);
			}
			$prev_time = $time;
			$prev_r = $r;
			undef $title;
			undef $time;
			if ($days_done > ($opt_days-$opt_offset)) { last; }
		}
	}
	
	$data = "success";
	$tree->delete;
	return $data;
}



####
# process_table: fetch a URL and process it
#
# arguments:
#	Date::Manip object giving the day to grab
#	xmltv id of channel
#	katso id of channel
#
# returns: list of programme hashes to write
#
sub process_html {
	my $id = shift;
	my $name = $channels{$id};
	
	t "Getting Channel $id";
	
	my $result;
	
	my @order = ('dstv');
#	my @order = ('mnet', 'dstv');
#	if ($opt_mnet_fallback) {
#		@order = ('dstv', 'mnet');
#	}
	my %processfuncs = (
		'dstv' => \&process_dstv_html,
#		'mnet' => \&process_mnet_html,
		);
	
	foreach my $site (@order) {
		my $func = $processfuncs{$site};
		$result = &{$func}($id);
		t "result: $result";
		if ($result !~ /^:error:(.*):/) {
			return;
		}
		say("\nSite $site returned no data - attempting next site for $name");
	}
	say("\nskipping channel '$name'. All sites failed");
}




# get channel listing
sub get_channels {

	my $channels = shift;
	
	my @urls = (
#		'Google' , 'http://www.google.com/search?q=cache%3Awww.dstv.com%2Fmain.aspx%3FID%3D136',
#		'DSTV'   , 'http://www.dstv.com/dstvsa/content/en/sa/dstv_premium?categorylistsearch=cl_results&category_id=158', 
# vmlf - Added new link for channel list of premium bouquet
# 		 There are other DSTV bouquets available, each one corresponds to a different bId
		'DSTV'   , 'http://www.dstv.com/dstvsa/content/en/sa/products?bId=1', 
	);
	
	my $local_data;
	my $i=0;
	my $bar;
	for ($i=0; $i < $#urls; $i+=2) {
		my $key = $urls[$i];
		my $url = $urls[$i+1];
		$bar = new XMLTV::ProgressBar("Getting list of channels from $key site", 1) if not $opt_quiet;
		t "Getting $key from $url";
		
		$local_data = get_url('GET', $url);
		if (!defined $local_data || $local_data =~ /^:error/) {
			$bar->finish() if not $opt_quiet;		
			print STDERR "Unable to get channel listing from $key site\n"
				if not $opt_quiet;
			next;
		}
		last;
	}
	if (!defined $local_data || $local_data =~ /^:error/) {
		print STDERR "Unable to get channel listing from any site\n.  Please check your connectivity or try again later\n"
			if not $opt_quiet;
		die;
	}
	
	t "Got channel data ".length($local_data)." bytes - about to parse";

	my $tree = HTML::TreeBuilder->new();
        $tree->utf8_mode(1);
	$tree->parse($local_data) or die "cannot parse content of channels page\n";
	$tree->eof;

# vmlf - DSTV NEW channel list page url includes all types of channel, 
#		so we need filter channels that are inside the videoChannels div
#		to get the tv channels only
	my @list = $tree->look_down(
		_tag   => 'span',
		sub {
 			$_[0]->look_up(_tag => 'div', id => 'videoChannels') and 
 			$_[0]->look_up(_tag => 'div', id => 'channel_list') and 
 			id => 'header_back'
 		},
	);
	foreach my $entry (@list) {
		my $temp = $entry->right();
		
		$temp =~ /([\w\s&\+!-]+).*\|[^\d]+(\d+)$/;
		my $name = $1;
		my $chanid = $2;
		
		$name =~ s/\s+$//g;
		if (exists {map { $_ => 1 } @dstvignorechannels}->{$name}) {
			t "Ignore bogus channel $name";
		} else {
			t "Channel $chanid = $name";
			$channels->{$chanid} = $name;
		}
	}
	$tree->delete;
	die "no channels could be found" if not keys %$channels;
	update $bar if not $opt_quiet;
	$bar->finish() if not $opt_quiet;
}

# Bump a YYYYMMDD date by one.
sub nextday {
	my $d = shift;
	my $p = parse_date($d);
	my $n = DateCalc($p, '+ 1 day');
	return UnixDate($n, '%Q');
}

sub mode_configure {

	XMLTV::Config_file::check_no_overwrite($config_file);
	get_channels(\%channels);

	open(CONF, ">$config_file") or die "cannot write to $config_file: $!";

	# Ask about each channel.
	my @chs = sort {uc($channels{$a}) cmp uc($channels{$b})} keys %channels;
	my @qs = map { "add channel '$channels{$_}'? " } @chs;
	my @want = ask_many_boolean(1, @qs);
	foreach (@chs) {
		my $w = shift @want;
		warn("cannot read input, stopping channel questions"), last
			if not defined $w;
		# Print a config line, but comment it out if channel not wanted.
		print CONF '#' if not $w;
		my $name = $channels{$_};
		print CONF "channel $_ $name\n";
	}

	my @choices = (1,7,14);
	my $days = ask_choice("Number of days to retrieve",$choices[2], @choices);
	print CONF "option days $days\n";

	my $retries = ask("Number of retries for failed downloads? (3)");
	$retries = 3 if $retries eq "";
	print CONF "option retries $retries\n";

	my $timeout = ask("Timeout for requests? (240)");
	$timeout = 240 if $timeout eq "";
	print CONF "option timeout $timeout\n";

#	say ("This grabber can get the listings from either mnet.co.za, or dstv.com");
#	say ("Which site would you like to use as the main site (mnet recommended)");
#	@choices = ('dstv','mnet');
#	my $fallback_option = ask_choice("Select one of: ",$choices[1], @choices);
#	if ($fallback_option eq 'dstv') {
#		print CONF "option mnet-fallback 1\n";
#	} else {
#		print CONF "option dstv-fallback 1\n";
#	}
	close CONF or warn "cannot close $config_file: $!";
	say("Finished configuration. ");

	exit();
}

sub update_dstv_eventstate {
# update form state attributes
	my $data = shift;

	if ($data =~ /id=\"__VIEWSTATE\" value=\"(.*)\"/) {
		$viewstate = $1;
		t "got viewstate: $viewstate";
	} else {
		print STDERR  "VIEWSTATE not found\n" if not $opt_quiet;
	}

	if ($data =~ /id=\"__EVENTVALIDATION\" value=\"(.*)\"/) {
		$eventvalidation = $1;
		t "got eventvalidation: $eventvalidation";
	} else {
		print STDERR  "EVENTVALIDATION not found\n" if not $opt_quiet;
	}
}

# Initialize cookies and retrieve current channel ID's
sub get_dstv_channel_mappings {
	t "refreshing dstv channel mappings";
	
	my $url = "http://guide.dstv.com/listing/default.aspx";
	my $data = get_url("GET", $url);

	if ($data =~ /^:error:/) {
		print STDERR  "Error getting dstv channel state data: $data\n"
			if not $opt_quiet;
		return;
	}

	update_dstv_eventstate($data);

	my %info = (
		'__VIEWSTATE' => $viewstate,
		'drpBouquet' => '1',
		'drpChannels' => '0',
		'drpDays' => '0', 
		'txtKeyword' => 'Keyword...', 
		'__EVENTVALIDATION' => $eventvalidation,
		'__EVENTTARGET' => 'drpBouquet',
		'__EVENTARGUMENT' => '',
		'__LASTFOCUS' => '', 
	);

 	$data = get_url("POST", $url, $url, undef, \%info);

        if ($data =~ /^:error:/) {
                print STDERR  "Error Getting dstv channel mappings: $data\n"
                        if not $opt_quiet;
                return;
        }

	update_dstv_eventstate($data);

    my $chantree = HTML::TreeBuilder->new();
	$chantree->utf8_mode(1);
	$chantree->parse($data) or die "cannot parse content of channels page\n";
	$chantree->eof;

    my $channame;
    my $chanid;
    my $chanselect = $chantree->look_down(_tag => 'select', name => 'drpChannels');
    my @chan_list = $chanselect->look_down(_tag => "option");
    foreach my $chanentry (@chan_list) {
        $chanid = $chanentry->attr('value');
        $channame = $chanentry->as_text;
        $dstvchannelmap{$channame} = $chanid;
	if ($dstvchannelfixups{$channame}) {
	        $dstvchannelmap{$dstvchannelfixups{$channame}} = $chanid;
	}
	
        t "Found channel $channame; internal reference $chanid";
    }

	$chantree->delete;

	t "Refresh successful";
	die "no channels could be found" if not keys %dstvchannelmap;
}

# Initialize cookies and retrieve current channel ID's
sub get_mnet_channel_mappings {

	t "refreshing mnet channel mappings";

	my $url = 'http://www.mnet.co.za/schedules/default.asp';
	my $result = get_url("GET", $url);
	if ($result =~ /^:error:/) {
		if ($result =~ /^:error:no data:(.+)$/s) {
			$result = $1;
		} else {
			print STDERR  "Error Getting mnet channel mappings: $result\n"
				if not $opt_quiet;
			return;
		}
	}	

	my $chantree = HTML::TreeBuilder->new();
        $chantree->utf8_mode(1);
	$chantree->parse($result) or die "cannot parse content of $url\n";
	$chantree->eof;

	my $chanselect = $chantree->look_down(_tag => 'select', name => 'channelid');
  	my @chan_list = $chanselect->look_down(_tag => "option");
	foreach my $chanentry (@chan_list) {
		my $chantemp = $chanentry->as_text;
        my $newchan;
		$chantemp =~ s/(^\s+|\s+$)//g;
		if ($chanentry->attr('value') =~ /^[\d\(\)]+$/) {		
			foreach my $fixup (keys %mnetchannelfixups) {
				if ($fixup eq $chantemp) {
					$newchan = $mnetchannelfixups{$fixup};
					t "fixing up $chantemp to $newchan";
				}
            }
            $newchan = $chantemp if not defined $newchan;
    		$mnetchannelmap{$newchan} = $chanentry->attr('value');
		}
	}
	$chantree->delete;
}

sub get_dstv_time_mappings() {
	my $data = shift;
	my ($res,$req);

	my $tree = HTML::TreeBuilder->new();
        $tree->utf8_mode(1);
	$tree->parse($data) or die "cannot parse dstv time mappings\n";
	$tree->eof;

	my @tags = $tree->look_down(
	  sub {
		# the lcs are to fold case
		lc($_[0]->attr('_tag')) eq 'img' and lc($_[0]->attr('src')) =~ /^get\.aspx\?guid/
	  }
	);

	our %dstvtimehashes = ();
	foreach my $tag (@tags) {
		my $temptag = $tag->attr('src');
		$tag->attr('src') =~ /^get\.aspx\?GUID=(.*)$/;
		my $guid = $1;
		if (not defined $dstvtimehashes{$guid}) {
			# unique guid - get data
			my $url = "http://www.dstv.com/DStv_Guide/get.aspx?GUID=$1";
			t "getting time mapping for GUID: $1";
			$req = GET $url;
			$req->header('Accept-Encoding','gzip');
			$req->header('Referer','http://www.dstv.com/DStv_Guide/default.aspx');
			$res = $ua->request($req);
			if ($res->is_success) {
				if (($res->headers()->header('Content-Encoding')) && 
					($res->headers()->header('Content-Encoding') eq 'gzip')) {
						$res->content(Compress::Zlib::memGunzip($res->content));
				}
				# hash
				my $imagehash = md5_hex($res->content);
				# compare
				if (defined $dstvfilehashes{$imagehash}) {
					$dstvtimehashes{$guid} = $dstvfilehashes{$imagehash};
				} else {
					if (! -f "$guid.gif") {
						if (not $opt_quiet) {
							print STDERR "Undefined image mapping for GUID=$guid\n";
							print STDERR "MD5 = $imagehash\n";
							print STDERR "Saving to file $guid.gif\n";
						}
					   if (!open GIF, ">$guid.gif") {
						   print STDERR "Cannot write file: $!\n"
						   	if not $opt_quiet;
						   next;
					   }
					   print GIF $res->content;
					   close GIF;
					}
				}
			  
			}
		}
	}

	$tree->delete;

}

# Download listings for a channel name - refresh mappings if necessary
sub get_dstv() {
	my $channame = shift;
    my $url = "http://guide.dstv.com/listing/default.aspx";
	
	my $mapped = dstv_channel_map($channame);
	die "cannot look up '$channame' in map" if not defined $mapped;
	die if not defined $use_days;
	my $days_param = $allowed_days{$use_days};

	my %info = (
        '__VIEWSTATE' => $viewstate,
		'drpBouquet' => '1',
		'drpChannels' => $mapped,
		'drpDays' => $days_param, 
		'txtKeyword' => 'Keyword...', 
		'btnSubmit.x' => '16', 
		'btnSubmit.y' => '15', 
		'__EVENTVALIDATION' => $eventvalidation, 
                '__EVENTTARGET' => '',
                '__EVENTARGUMENT' => '',
                '__LASTFOCUS' => '',
	);
	t "getting channel: $channame (ID = $mapped)";

 	my $result = get_url("POST", $url, $url, undef, \%info);

	if ($result =~ /^:error:/) {
		# Always attempt a refresh of channel mappings once
		get_dstv_channel_mappings();
    	$mapped = dstv_channel_map($channame);
    	die "cannot look up '$channame' in map" if not defined $mapped;
	    %info = (
            '__VIEWSTATE' => $viewstate,
    		'drpChannels' => $mapped,
    		'drpDays' => $days_param, 
    		'txtKeyword' => '', 
    		'btnSubmit.x' => '15', 
    		'btnSubmit.y' => '12', 
    		'__EVENTVALIDATION' => $eventvalidation, 
    	);
     	$result = get_url("POST", $url, $url, undef, \%info);
	}

	return $result;
}



sub get_mnet() {
	my $channame = shift;
	my $data;
	my $tries = 0;
	my $req;
	my $res;
 
	my $chanid = mnet_channel_map($channame); 
	if (not defined $chanid) {
	    my $msg = "no corresponding mnet channel found for $channame";
	    print STDERR "\n$msg" if not $opt_quiet;
	    # This seems to be the convention for returning errors.
	    return ":error:$msg:";
	}

	my $start_date = POSIX::strftime("%Y/%m/%d", gmtime(time()+(($opt_offset)*86400)));
    my $end_date;
    if ($opt_days < max keys %allowed_days) {
    	$end_date = POSIX::strftime("%Y/%m/%d", gmtime(time()+(($opt_days)*86400)));
    } else {
    	$end_date = POSIX::strftime("%Y/%m/%d", gmtime(time()+(($opt_days-1)*86400)));
    }    
	my %info = (
		'startDate' => $start_date,
		'EndDate' => $end_date, 
		'sType' => '5', 
		'channelid' => $chanid, 
		'searchstring' => '', 
		'channel' => $chanid, 
		'theType' => 'today', 
		'firstRun' => 'false', 
	);
	t "getting channel: $channame (ID = $chanid)";
 	$data = get_url("POST", "http://www.mnet.co.za/schedules/default.asp", 'http://www.mnet.co.za/schedules/default.asp', undef, \%info);
	return $data;
}


sub init_cookies {
	get_nice('http://guide.dstv.com/listing/default.aspx');
	my $bar = new XMLTV::ProgressBar('Initialising cookies', 1)
	  if not $opt_quiet;
	update $bar if not $opt_quiet;
	$bar->finish() if not $opt_quiet;
}

sub gen_start_time {
	my ($date, $time, $now) = @_;
	
	# Date = 'Friday 23 May'
	# Time = '14:00';
	# str2time sometimes gets the wrong year
	# Append the current year to the date
	# If we are in Nov or Dec, reading for Jan or Feb, year++
	$date =~ s/^(Today|Tomorrow|Tommorrow|Tommorow|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)//g;
	my $year = (gmtime($now))[5] + 1900;
	my $mon = (gmtime($now))[4] + 1;
	if (($mon == 11 || $mon == 12) && ($date =~ /(January|February)/)) {
		$year++;
	}
	my $timestamp = UnixDate("$date $year $time", "%s");
#	my $timestamp = str2time("$date $year $time");
	if (!defined $timestamp) {
		print STDERR "Error: Cannot decode time: $date $year $time\n";
	}
	my $rv = POSIX::strftime("%Y%m%d%H%M%S", gmtime($timestamp));
	return $rv;
}




sub initialise_ua {
	my $cookies = HTTP::Cookies->new;
	#my $ua = LWP::UserAgent->new(keep_alive => 1);
	my $ua = LWP::UserAgent->new;
	# Cookies
	$ua->cookie_jar($cookies);
	# Define user agent type
	$ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US)');
	# Define timouts
	$ua->timeout(240);
	# Use proxy if set in http_proxy etc.
	$ua->env_proxy;
	
	return $ua;
}



sub read_config {
    my $channels = shift;
    my $options = shift;
	
    my @config_lines = XMLTV::Config_file::read_lines($config_file);
	
	
    # Read configuration.
    my $line_num = 1;
    foreach (@config_lines) {
	++ $line_num;
	next if not defined;
	s/#.*//g;
	next if /^\s+$/;
	s/\s+$//g;
	if (/^channel/) {
	    my (undef, $chanid, $name) = split(/\s+/, $_, 3);
	    $channels->{$chanid} = $name;
	}
	if (/^option/) {
	    my (undef, $conf_option, $conf_value) = split(/\s+/, $_, 3);
			
#	    $opt_mnet_fallback = 1 if $conf_option eq 'mnet-fallback';
#	    $opt_dstv_fallback = 1 if $conf_option eq 'dstv-fallback';
	    $opt_retries = $conf_value if $conf_option eq 'retries';
	    $ua->timeout($conf_value) if $conf_option eq 'timeout';
			
	    if ($conf_option eq 'days') {
		if (defined $opt_days) {
		    # Day stuff was given on the command line.  This
		    # should override whatever's in the config file.
		    #
		} else {
		    # Set the number of days from the config file.  It
		    # must be one of the numbers allowed by the site.
		    $opt_days = $use_days = $conf_value;
		    die "bad number of days $use_days in config file\n"
		      if not grep { $_ == $use_days } keys %allowed_days;
		}
	    }
	}
    }
#    die 'config file: --mnet-fallback and --dstv-fallback are mutually exclusive' 
#      if (defined $opt_mnet_fallback && $opt_dstv_fallback);
}

sub get_url($$$$$) {

	my $method = shift;
	my $url = shift;
	my $referrer = shift;
	my $agent = shift;
	my $varhash = shift;
	

	t "Downloading URL: $url";
	my $req = GET "$url";
	$req->header('Accept-Encoding','gzip');
	$req->header('Referer',$referrer) if defined $referrer;
	$req->agent($agent) if defined $agent;

	my $tries = 0;
	my $data;
    my $offset_counter = 0;
	while ($tries < $opt_retries && not defined $data) {
		$tries++;
		my $res;
		
		t "Attempt $tries";
		
		if (lc($method) eq 'post') {
			$res = $ua->post($url, $varhash);
		} else {
			$res = $ua->request(GET "$url");
		}
		if ($res->is_success) {
			if (($res->headers()->header('Content-Encoding')) && 
			 ($res->headers()->header('Content-Encoding') eq 'gzip')) {
				$res->content(Compress::Zlib::memGunzip($res->content));
			}
			if (! $res->content =~ /class="srch_rslt_head1"/) {
				t "No listing data found";
				$data = ":error:no data:" . $res->content;
			} else {
				$data = $res->content;
			}
		} else {
			print STDERR "\nserver error: " . $res->status_line
				if not $opt_quiet;
			t "Failed"
		}
	}
	if (not $data) {$data = ":error:maximum retries:"};   
	return $data;
}
