#! /usr/local/bin/perl # Copyright (C) 2007-2008 Lars Eggert # All rights reserved. # # Redistribution and use in source and binary forms are permitted # provided that the above copyright notice and this paragraph are # duplicated in all such forms and that any documentation, # advertising materials, and other materials related to such # distribution and use acknowledge that the software was developed # by Lars Eggert. The name of the author may not be used to endorse # or promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. # $Id: important-dates.pl,v 1.5 2008/10/27 05:58:03 eggert Exp $ use warnings; use strict; use Getopt::Long; use FindBin; use WWW::Mechanize; use Data::Ical; use Data::ICal::Entry::Event; use Date::ICal; use Date::Parse; use DateTime; use HTML::TokeParser::Simple; # process options my %opt = ( v => 0 ); if (GetOptions(\%opt, "i|ietf=s", "v|verbose+", "h|help") == 0 or $opt{h} or not exists $opt{i}) { print < Where: -help print help text and exit -verbose be more verbose -ietf number of the IETF meeting to process STOP exit; } sub cleanup ($) { my $text = shift; # remove crap from web page dump $text =~ s/[\t \xA0]+/ /g if $text; $text =~ s/^[\t ]+//g if $text; $text =~ s/[\t ]+$//g if $text; $text =~ s/[\n\r]+/\n/g if $text; $text =~ s/^[\r\n]//g if $text; $text =~ s/[\t ]+/ /g if $text; chomp $text; return $text; } # get the page LWP::Debug::level('+') if $opt{v} > 1; my $url = "http://www.ietf.org/meetings/$opt{i}/$opt{i}-cutoff_dates.html"; my $mech = WWW::Mechanize->new(env_proxy => 1); $mech->proxy('https', undef); $mech->get($url); my $page = $mech->content(); print STDERR $page if $opt{v} > 1; # make a calendar my $cal = Data::ICal->new(); # make a parser and parse my $months = "(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)"; my $p = HTML::TokeParser::Simple->new(string => $page); while (my $token = $p->get_token) { next unless $token->is_text; my $txt = cleanup $token->as_is; next unless $txt =~ /^$months/; $txt =~ /^([a-z]+\s+\d+(,\s+\d+)?).*\s+-\s+(.*)/i; my ($date, $descr) = ($1, $3); my $etime = $1; if ($descr) { $descr =~ /.*\(([0-9]+.*)\).*/; # if there is no match this will be set to $1 from above $etime = $1; } # skip and warn if we're missing data unless ($date and $descr) { print STDERR "$txt\n$date\n$descr\n" if $opt{v}; next; } # make summary more readable (and shorter) $descr = lc $descr; $descr =~ s/\s+ads\s+/ ADs /ig; $descr =~ s/cut-off/cutoff/ig; $descr =~ s/date //ig; $descr =~ s/working group/WG/ig; $descr =~ s/area director/AD/ig; $descr =~ s/BOF requests/BOFs/ig; $descr =~ s/internet draft/ID/ig; $descr =~ s/bof/BOF/ig; $descr =~ s/(\(.*\))//ig; $descr =~ s/for comment.*//ig; $descr =~ s/(((due )?by|at|appreciated|, upload)\s.*)$//ig; $descr =~ s/to be published.*//ig; $descr =~ s/for requests to//ig; $descr =~ s/and/\&/ig; $descr =~ s/initial document.*/-00/ig; $descr =~ s/(.*)\.\s+.*/$1/ig; $descr =~ s/(.*) \d+:\d+.*/$1/; $descr =~ s/[\t ]+/ /ig; $descr = "IETF-$opt{i} $descr"; # if the item has a timestamp, use that, otherwise construct one my $year = DateTime->now->year; $etime =~ s/[a-z]+day,?//i; $etime =~ s/24:00/00:00/; $etime =~ s/\/GMT//; $etime = "$date, $etime" unless $etime =~ /$months/i; $etime .= " 01:00 UTC" unless $etime =~ /\d+:\d+/; $etime .= " $year" unless $etime =~ /20\d+/; my $when = str2time($etime); die "could not parse event date: $etime" unless $when; # add to calendar (US/Eastern has GMT offset -0600) my $t = Date::ICal->new(epoch => $when); my $tstr = "TZID=US/Eastern:" . $t->ical(offset => "-0600"); my $evt = Data::ICal::Entry::Event->new(); $evt->add_properties( summary => $descr, description => $txt, url => $url, dtstart => $tstr, dtend => $tstr, created => Date::ICal->new(epoch => time)->ical(), ); $cal->add_entry($evt); } my $caltext = $cal->as_string; $caltext =~ s/(DT(START|END)):TZID(.*)/$1;TZID$3/gm; print $caltext;