#!/usr/bin/perl

#parses a paste of BC transit's schedule listing into links for http://skeena.net/bus/
#usage:
# ./parse_transit.pl schedule_paste.txt > links_snippet.html
#
#schedule_paste.txt should have lines that look like:
#1 RICHARDSON / DOWNTOWN
#2/2A OAK BAY / WILLOWS / DOWNTOWN
#3 GONZALES / BEACON HILL / DOWNTOWN
#...
#(it's OK if the web browser put blank lines in between, or if BC transit
#included a link twice; they'll be sorted out.)


$schedule_paste = shift @ARGV;

$schedule_paste =~ s/'/\\'/;
$lines = `sort -un \$'$schedule_paste'`;

@lines = split /\n/ , $lines;

#pull empty line off the top if present
if ($lines[0] =~ m/^$/) {
	shift @lines;
}

#can't use foreach because of special route numbering; see below
while ($#lines) {
	$line = shift @lines;
	#route number is the first field, separated by a space
	#split into 2 fields; the route number and the route names
	@route = split / / , $line , 2;
	$route_num = $route[0];
	$route_names = $route[1];

	#special handling: if it's got a '/' in the route number,
	#push copies back onto the stack of routes for each
	#variant of the route number. So far BC transit only does
	#this for the 2/2A route:
	if ($route_num =~ m/\//) {
		@special_nums = split /\// , $route_num;
		foreach my $special_num (@special_nums) {
			unshift @lines , $special_num." ".$route_names;
		}
		next;
	}

	#split route names and provide links for each name
	@route_names = split /\s+\/\s+/ , $route[1];
	$direction = 0;
	foreach my $name (@route_names) {
		#more special handling: if the route number contains non-numeric chars,
		#BC transit's link to the schedule page does not include them, so strip them
		#out (e.g. the 2A's URL looks like "...route=2:0...")
		$link_route_num = $route_num;
		$link_route_num =~ s/[^0-9]//g;
		print "<a name='r".$link_route_num."_$direction'>$route_num $name</a>\n";
		#all route names after the first one are the other direction:
		$direction = 1;
	}
}
