#! /bin/bash # Copyright (C) 2007 Lars Eggert # All rights reserved. # # Redistribution and use in source and binary forms are permitted # provided that the above copyright notice and this paragraph are # duplicated in all such forms and that any documentation, # advertising materials, and other materials related to such # distribution and use acknowledge that the software was developed # by the author. The name of the author may not be used to endorse # or promote products derived from this software without specific # prior written permission. # # THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE. # $Id: rsync-ietf,v 1.8 2008/07/14 07:49:45 eggert Exp $ flags="-nv -nd -N -T 30 --waitretry=1 --retry-connrefused" cd /Users/Shared/Misc./ids # get some supplementary files first for file in all_id.txt 1id-index.txt 1id-abstracts.txt; do rm $file wget -nv http://tools.ietf.org/internet-drafts/$file done grep -Ev ".+[[:space:]]+.+[[:space:]]+(RFC|Expired)" all_id.txt | awk '/^draft-/ {print "http://tools.ietf.org/internet-drafts/" $1 ".txt"}' | wget $flags -i- 2>&1 | awk '{print > "/dev/fd/2"}; /^http:/ { sub(/tools/, "www", $0); print substr($0, 1, length - 1)}' | wget $flags -i- cd /Users/Shared/Misc./rfcs # hacked up to not try to download old RFCs < 4900 all the time # remove [4-9] from the pattern to turn off hack wget -nv -O- http://tools.ietf.org/rfc/rfc-index.txt | awk '/^[4-9][0-9]+ / {print "http://tools.ietf.org/rfc/rfc" $1 ".txt"}' | wget $flags -i- 2>&1 | awk '{print > "/dev/fd/2"}; /^http:/ { sub(/tools/, "www", $0); print substr($0, 1, length - 1)}' | wget $flags -i- cd /Users/Shared/Misc./auth48 wget -r -l 1 -A txt,html $flags http://www.rfc-editor.org/authors/