#! /usr/local/bin/perl # Copyright (C) 2007-2008 Lars Eggert # All rights reserved. # # Redistribution and use in source and binary forms are permitted # provided that the above copyright notice and this paragraph are # duplicated in all such forms and that any documentation, # advertising materials, and other materials related to such # distribution and use acknowledge that the software was developed # by Lars Eggert. The name of the author may not be used to endorse # or promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. # $Id: checkmytrip.pl,v 1.21 2008/05/13 07:46:17 eggert Exp $ # to-do: check multi-type reservations use warnings; use strict; use Getopt::Long; use FindBin; use HTML::TokeParser::Simple; use WWW::Mechanize; use File::Slurp; use Data::Dumper; use Data::Ical; use Data::ICal::Entry::Event; use Date::ICal; use Date::Parse; use DateTime; use Locale::Country; # Process options my %opt = ( ); if (GetOptions(\%opt, "n|name=s", "r|reference=s", "v|verbose+", "h|help", "s|source=s") == 0 or $opt{h} or not exists $opt{n} or not exists $opt{r}) { print <] [-reference ] Where: -help print help text and exit -verbose be more verbose -name last name of the passenger -reference AMADEUS booking reference code -s read page from given file rather than HTTP STOP exit; } $opt{n} = uc $opt{n}; $opt{r} = uc $opt{r}; LWP::Debug::level('+') if $opt{v} and $opt{v} > 2; my $blurb = "Extracted by $FindBin::Script, ". '$Revision: 1.21 $. '. "Email bug reports, including the HTML of the checkmytrip.com page, ". "to lars.eggert\@gmx.net."; sub dlog ($) { my $line = shift; printf STDERR "$line\n" if $opt{v}; } sub cleanup ($) { my $text = shift; # remove crap from web page dump $text =~ s/[\t \xA0]+/ /g if $text; $text =~ s/^[\t ]+//g if $text; $text =~ s/[\t ]+$//g if $text; $text =~ s/[\n\r]+/\n/g if $text; $text =~ s/^[\r\n]//g if $text; # fix some weird stuff from checkmytrip $text =~ s/AIRPORT/Airport/gi if $text; $text =~ s|kilogram\(s\) per |kg/|g if $text; $text =~ s|information not available|N/A|g if $text; $text =~ s/terminal /T/gi if $text; $text =~ s/traveller/traveler/g if $text; $text =~ s/Fare type/Fare/g if $text; $text =~ s/check in/check-in/gi if $text; $text =~ s/Airbus Industrie/Airbus/gi if $text; $text =~ s/economy/Economy/gi if $text; chomp $text; return $text; } sub icaldate ($;$$$$) { my ($date, $time, $offset, $tzoffset, $country) = @_; $offset = 0 unless defined $offset; $time = "00:00" unless defined $time; if ($country and $tzoffset) { # make a time in the correct timezone my $code = country2code $country; my %tzid; # hack: assumes Jan 1 is never in DST ever my $dt = DateTime->new(month => 1, day => 1, year => 2008); # sort the timezones by ascending string length, such that # shorter ones get picked preferentially - this hopefully # helps avoid an issue where Exchange gets confused by # "interesting" timezones such as "America/Indiana/Vincennes" my @zones = DateTime::TimeZone->names_in_country($code); @zones = sort { length $a <=> length $b } @zones; foreach my $tzname (@zones) { my $tz = DateTime::TimeZone->new(name => $tzname); my $offset = $tz->offset_for_datetime($dt); my $offsetstr = DateTime::TimeZone->offset_as_string($offset); $tzid{$offsetstr} = $tzname unless exists $tzid{$offsetstr}; } my $tid = $tzid{$tzoffset}; # construct ical timestamp my $t = Date::ICal->new(epoch => (str2time("$date $time $tzoffset") + $offset * 86400)); my $tstr = $t->ical(offset => $tzoffset); dlog "$date $time $offset $tzoffset $country ". "$code TZID=$tid:$tstr"; return "TZID=$tid:$tstr"; } else { # make a floating time my $t = Date::ICal->new(epoch => (str2time("$date $time") + $offset * 86400)); my $tstr = $t->ical(localtime => 1); dlog "$date $time $offset TZID=$tstr"; return $tstr; } } sub tzoffset($) { my $code = uc shift; # hacks for known incorrect information on world-airport-codes.com # (seems to take a while for the maintainers to approve corrections) # return "-0900" if $code eq "ANC"; my $mech = WWW::Mechanize->new(env_proxy => 1); $mech->proxy('https', undef); $mech->get("http://www.world-airport-codes.com/search/". "?criteria=$code&searchWhat=airportcode"); my $text = $mech->content(); $text =~ /.*\s+([+-]\d{1,2}\.\d{1,2})\s+.*/; $text =~ /([+-])(\d+)\.(\d+)/; return sprintf "$1%02d%02d", $2, "0.$3"*60; } sub add_hotel ($$$$) { my ($p, $url, $cal, $bookref) = @_; $p->get_tag("span"); my $hotelref = cleanup $p->get_trimmed_text("/span"); foreach (1..2) { $p->get_tag("td"); } my $city = cleanup $p->get_trimmed_text("/td"); $p->get_tag("td"); my $hotel = cleanup $p->get_text("/td"); $p->get_tag("td"); my $addr = cleanup $p->get_text("/td"); $addr =~ s/\n/, /g; $p->get_tag("td"); my $phone = cleanup $p->get_text("/td"); foreach (1..4) { $p->get_tag("td"); } my $checkin = cleanup $p->get_trimmed_text("/td"); $checkin =~ s/(.*\d+)\s+\d+.*/$1/; $p->get_tag("td"); my $checkout = cleanup $p->get_trimmed_text("/td"); my $note =<new(); $evt->add_properties( summary => $hotel, description => $note, location => $city, url => $url, dtstart => icaldate("$checkin_date, $checkin_year", "0:00"), dtend => icaldate("$checkout_date, $checkout_year", "23:59:59"), created => Date::ICal->new(epoch => time)->ical(), ); $cal->add_entry($evt); } sub add_car ($$$$) { my ($p, $url, $cal, $bookref) = @_; $p->get_tag("span"); my $carref = cleanup $p->get_trimmed_text("/span"); foreach (1..2) { $p->get_tag("td"); } my $where = cleanup $p->get_trimmed_text("/td"); $p->get_tag("td"); my $car_rental = cleanup $p->get_trimmed_text("/td"); foreach (1..2) { $p->get_tag("td"); } my $car = cleanup $p->get_trimmed_text("/td"); $car =~ s/[,;]\s+/\n /g; foreach (1..3) { $p->get_tag("td"); } my $pickup_loc = cleanup $p->get_trimmed_text("/td"); $p->get_tag("td"); my $pickup_time = cleanup $p->get_trimmed_text("/td"); $p->get_tag("td"); my $dropoff_loc = cleanup $p->get_trimmed_text("/td"); $p->get_tag("td"); my $dropoff_time = cleanup $p->get_trimmed_text("/td"); $pickup_loc = $dropoff_loc unless $pickup_loc; $dropoff_loc = $pickup_loc unless $dropoff_loc; my $note =<new(); $evt->add_properties( summary => "$car_rental Car Rental", description => $note, location => $where, url => $url, dtstart => icaldate("$pickup_date, $pickup_year", $pickup_hour), dtend => icaldate("$dropoff_date, $dropoff_year", $dropoff_hour), created => Date::ICal->new(epoch => time)->ical(), ); $cal->add_entry($evt); } sub add_flight ($$$$$) { my ($p, $url, $cal, $bookref, $airref) = @_; $p->get_tag("td"); my $date = cleanup $p->get_trimmed_text("/td"); foreach (1..5) { $p->get_tag("td"); } my $dep_time = cleanup $p->get_trimmed_text("/td"); my $dep_offset = 0; if ($dep_time =~ /(.*) ([+-]\d+) day\(s\)/) { ($dep_time, $dep_offset) = ($1, $2); } $p->get_tag("td"); my $text = cleanup $p->get_trimmed_text("input"); $text =~ /(.*), (.*) -/; my ($dep_city, $dep_country) = ($1, $2); foreach (1..2) { $p->get_tag("input"); } my $dep_code = $p->get_tag("input")->get_attr("value"); $text = cleanup $p->get_trimmed_text("/td"); $text =~ /(.*) , (.*)/; my ($dep_airport, $dep_terminal) = ($1, $2); $dep_terminal =~ s/terminal /T/i; foreach (1..2) { $p->get_tag("td"); } my $arr_time = cleanup $p->get_trimmed_text("/td"); my $arr_offset = 0; if ($arr_time =~ /(.*) ([+-]\d+) day\(s\)/) { ($arr_time, $arr_offset) = ($1, $2); } $p->get_tag("td"); $text = cleanup $p->get_trimmed_text("input"); $text =~ /(.*), (.*) -/; my ($arr_city, $arr_country) = ($1, $2); foreach (1..2) { $p->get_tag("input"); } my $arr_code = $p->get_tag("input")->get_attr("value"); $text = cleanup $p->get_trimmed_text("/td"); $text =~ /(.*) , (.*)/; my ($arr_airport, $arr_terminal) = ($1, $2); my ($key, $val, %data); $p->get_tag("td"); do { $p->get_tag("td"); $key = cleanup $p->get_trimmed_text("/td"); $key =~ s/://g; $p->get_tag("td"); $val = cleanup $p->get_trimmed_text("/td"); $data{$key} = $val if ($key and $val); } until ($key and not $val); # final thing is always the meal, which doesn't have a key (currently) $data{Meal} = ucfirst lc $key; # dlog Dumper \%data; my $note =<new(); $evt->add_properties( summary => "$dep_code-$arr_code", description => $note, location => $data{Airline}, url => $url, dtstart => icaldate($date, $dep_time, $dep_offset, tzoffset $dep_code, $dep_country), dtend => icaldate($date, $arr_time, $arr_offset, tzoffset $arr_code, $arr_country), created => Date::ICal->new(epoch => time)->ical(), ); $cal->add_entry($evt); } my $page; my $url = "https://www.checkmytrip.net/CMTServlet?R=$opt{r}&L=GB&N=$opt{n}"; if ($opt{s}) { $page = read_file($opt{s}); } else { my $mech = WWW::Mechanize->new(env_proxy => 1); $mech->proxy('https', undef); $mech->get($url); $page = $mech->content(); } my $cal = Data::ICal->new(); my $p = HTML::TokeParser::Simple->new(string => $page); my ($tag, $bookref, $airref); do { $tag = $p->get_tag("td"); my $text = cleanup $p->get_trimmed_text("/td"); if ($text =~ /Booking reservation number:/) { $text =~ s/Booking reservation number:\s+//; $bookref = $text; } elsif ($text =~ /Airline confirmation number\(s\):/) { $text =~ s/Airline confirmation number\(s\):\s+//; $airref = $text; $airref =~ s/\s([A-Z0-9]+)\s/ $1, /g; } elsif ($text =~ /Flight \d+$/) { add_flight $p, $url, $cal, $bookref, $airref; } elsif ($text =~ /your car selection/) { add_car $p, $url, $cal, $bookref; } elsif ($text =~ /your hotel selection/) { add_hotel $p, $url, $cal, $bookref; } } while (defined $tag); my $caltext = $cal->as_string; $caltext =~ s/(DT(START|END)):TZID(.*)/$1;TZID$3/gm; print $caltext;