#!/usr/local/bin/perl require 'jcode.pl'; use Time::Local; use HTTP::Date; use File::Copy 'cp'; use IO::Socket; use IO::Select; $VERSION = '0.1'; $USER_AGENT = "DAIZU/$VERSION"; $URL = 'http://www.hauN.org/~konishi/dize/'; $REMOTE_CFG_FILE = 'daizu.cfg'; # # $AGENT_NAME = 'AGENT_NAME'; $G_URL = 'GET_URL'; $R_URL = 'REFER_URL'; $AUTH_RE = 'AUTH_RE'; $CFG_FILE = 'daizu.cfg'; $ONE_HOUR = 60 * 60; $THIS_TIME = time(); $EXPIRE_MARGIN = $ONE_HOUR * 1.45 ; $THIS_YEAR = (localtime($THIS_TIME))[5]; # $CONNECT_TIMEOUT = 3; $READ_TIMEOUT = 10; # &jcode::init(); # ## read config &read_config_file; foreach my $file ( keys %$REMOTE_CFG ){ # print STDERR "FILE: $file\n"; next if &get_remote($file); &get_remote($file); &remote_data($file); } sub read_config_file{ my ($user_agent, $filename, $url, $refer, $re); open ( FILE ,$REMOTE_CFG_FILE); while (){ next if ( /^\s*$/ ); next if ( /^#/ ); $_ = &jcode::euc($_); chomp; ( $filename, $user_agent, $url, $refer, $re ) = split ("::"); $REMOTE_CFG->{$filename}->{$AGENT_NAME} = $user_agent; $REMOTE_CFG->{$filename}->{$G_URL} = $url; $REMOTE_CFG->{$filename}->{$R_URL} = $refer; $REMOTE_CFG->{$filename}->{$AUTH_RE} = $re; } close FILE; } sub remote_data{ my $file = shift; my $url = undef; my $time = 0; my $detected = 0; my $di_file = $file.".di"; my $fixed = ""; my $re = $REMOTE_CFG->{$file}->{$AUTH_RE}; if ( $REMOTE_CFG->{$file}->{$G_URL} =~ /^file:/i ){ my $f = (&parseURL($REMOTE_CFG->{$file}->{$G_URL}))[3]; $detected = (stat($f))[9]; } $fixed = "Authorized: $USER_AGENT\n"; $fixed .= "Authorized-Url: $URL\n"; # $fixed .= "Expires: ".HTTP::Date::time2str($THIS_TIME+$EXPIRE_MARGIN)."\n"; # $fixed .= "Expire: ".HTTP::Date::time2str($THIS_TIME+$EXPIRE_MARGIN)."\n"; $fixed .= "X-Original-Authorized: "; $fixed .= $REMOTE_CFG->{$file}->{$AGENT_NAME}."\n"; $fixed .= "X-Original-Authorized-url: "; $fixed .= $REMOTE_CFG->{$file}->{$R_URL}."\n"; open (FILE,$file); open (DI, "> $di_file" ); print DI "User-Agent: $USER_AGENT\n"; print DI "Date: ",HTTP::Date::time2str($THIS_TIME),"\n\n"; while (){ # EUCにする $_=&jcode::euc($_); # 改行コードを取り除く chomp; if ( /last-modified:\s*.*/i || m%\d+\s*時\s*\d+\s*分\s*更新% ){ $detected = lm2time($_); } next unless ( m%$re% ); if ( /\d+:\d+/){ if (m%href=\"([^\"]+)\"[^>]*>%i){ $url = $1; # URLの後のゴミを取り除く。 $url =~ s/[\?&]\d+(\#\w+)?$//; $url =~ s/\%7E/~/o; # 時刻を取り出す。 $time = &lm2time($_); if ( $url ne "" && $time ne ""){ # &set_info( $file, $url, $time, $detected ); print DI "Url: $url\n"; print DI "Last-Modified: "; print DI HTTP::Date::time2str( $time ),"\n"; print DI "Last-Modified-Detected: "; print DI HTTP::Date::time2str( $detected ),"\n"; print DI "Expires: "; print DI HTTP::Date::time2str($detected+$EXPIRE_MARGIN),"\n"; print DI "Expire: "; print DI HTTP::Date::time2str($detected+$EXPIRE_MARGIN),"\n"; print DI $fixed,"\n"; } } } } close FILE; close DI; } ### ## リモートファイルの取得。 # sub get_remote{ my $out_file = shift; my $method = 'get'; my $url = $REMOTE_CFG->{$out_file}->{$G_URL}; my $http_opt; my $scheme; my $host; my $port; my $userfile; my $status; my $remote; my $sel = IO::Select->new(); my @ready; my $this_time = time(); if ( $method =~ /head/i ){ $http_opt = "HEAD "; }else{ $http_opt = "GET "; } ( $scheme, $host, $port, $userfile ) = &parseURL( $url ); if ( $scheme eq 'file'){ if ( -e $userfile ){ if ( &file_time($userfile) > &file_time($out_file) ){ cp($userfile, $out_file ); return 0; } return 1; }else{ print STDERR "not exist ",$url," $!\n"; return 2; } }else{ #protcol is not 'file' if ( $HTTP_PROXY ne "" ){ ( $scheme, $host, $port, $userfile ) = &parseURL( $HTTP_PROXY ); $http_opt .= "$url HTTP/1.0\n"; # $http_opt .= "$url HTTP/1.1\n"; $http_opt .= "Host: ".$host."\n"; }else{ if ($scheme eq 'dirp' ){ $http_opt = "dirp/0.2\n"; }else{ $http_opt .= "$userfile HTTP/1.0\n"; # $http_opt .= "$userfile HTTP/1.1\n"; $http_opt .= "Host: $host\n"; } } $http_opt .= "User-Agent: $USER_AGENT\n"; $http_opt .= "Date: ".HTTP::Date::time2str($this_time)."\n"; $http_opt .= "\n" if ( $scheme eq 'dirp'); if ( $scheme eq 'dirp' ){ if ( -e $out_file ){ $http_opt .= "If-Detected-Modified-Since: "; $http_opt .= HTTP::Date::time2str( (stat($out_file))[9] ); $http_opt .= "\n"; } else { $http_opt .= "If-Detected-Modified-Since: "; $http_opt .= HTTP::Date::time2str( 0 ); $http_opt .= "\n"; } }else{ if ( -e $out_file ){ $http_opt .= "If-modified-since: "; $http_opt .= HTTP::Date::time2str( (stat($out_file))[9] ); $http_opt .= "\n"; } } if ( $scheme eq 'http'){ $http_opt .= "Connection: close\n"; } # print STDERR $scheme, $host, $port, $userfile,"\n"; # print STDERR $http_opt,"\n\n"; $remote = IO::Socket::INET->new( Proto => "tcp", PeerAddr => $host, PeerPort => $port, Timeout => $CONNECT_TIMEOUT ); if ($remote) { $sel->add($remote); $remote->autoflush(1); print $remote $http_opt,"\n"; @ready = $sel->can_read($READ_TIMEOUT); if ( @ready ){ foreach my $sock (@ready){ open FILE,">$out_file.$$"; binmode FILE; if ( $scheme eq 'http'){ $status = <$remote>; $status =~ s/\015\012/\012/og; $status =~ s/\015/\012/og; if ( $method =~ /head/i ){ print FILE "URL: ", $REMOTE_CFG->{$out_file}->{$R_URL},"\n"; print FILE "Last-Modified-Detected: "; print FILE HTTP::Date::time2str( $this_time ),"\n"; print FILE "Expire: "; print FILE HTTP::Date::time2str( $this_time + $ONE_HOUR),"\n"; print FILE "Expires: "; print FILE HTTP::Date::time2str( $this_time + $ONE_HOUR),"\n"; print FILE "Authorized: $USER_AGENT\n"; print FILE "Authorized-Url: $URL\n"; } # print FILE "Status: ", $status; } while(<$remote>){ s/\015\012/\012/og; s/\015/\012/og; print FILE; # print STDOUT; } close(FILE); close($remote); # print "Status: $status\n"; if ( $status =~ /200/ || $scheme eq 'dirp' ){ rename "$out_file.$$", $out_file; return 0; }else{ unlink "$out_file.$$"; return 1; } } }else{ print STDERR "read timeout ",$url,"\n"; close($remote); return 2; } }else{ print STDERR "cannot open ",$url," $!\n"; return 2; } } } sub parseURL{ local ($_) = shift; my $protcol = 'http'; my $port = 80; my $server; my $userfile; if ( m%^(\w+://)%o ){ s%^((\w+)://)%%; $protcol = $2; } unless ( $protcol eq 'file' ){ s%^([^/:]+)%%; $server = $1; if ( m%^(:(\d+))%o ){ s%^(:(\d+))%%o; $port = $2; } } $userfile = $_ ; $userfile = "/$userfile" unless ( $userfile =~ m%^/% ); return ( $protcol, $server, $port, $userfile ); } sub file_time{ my $file = shift; (stat($file))[9]; } ### ## 様々な形式の時刻表記から、timeを取り出す。 # sub lm2time{ $_ = shift ; my $time = 0; my ($y,$m,$d, $H,$M,$S); if (m%\w+, (\d+) (\w+) (\d+) (\d\d):(\d\d):(\d\d) (\w+)%io) { # rfc1123 "Sun, 06 Nov 1994 08:49:37 GMT" $y = $3 ; $y -= 1900 if ( $y > 1900); $y += 100 if ( $y <70 ); $time = &timeglobal( $7, $6, $5, $4, $1,&month2num($2),$y); } elsif (m%\w+, (\d+)-(\w+)-(\d+) (\d\d):(\d\d):(\d\d) (\w+)%io) { # rfc1036 "Sunday, 06-Nov-94 08:49:37 GMT" $y = $3 ; $y -= 1900 if ( $y > 1900); $y += 100 if ( $y <70 ); $time = &timeglobal( $7, $6, $5, $4, $1,&month2num($2),$y); } elsif (m%\w+ (\w+) (\d+) (\d\d):(\d\d):(\d\d) (\d+)%io) { # Sun Nov 6 08:49:37 1994 (ANSI C's asctime() format) $y = $6 ; $y -= 1900 if ( $y > 1900); $y += 100 if ( $y <70 ); $time = &timeglobal( "GMT", $5, $4, $3, $2,&month2num($1),$y); } elsif (m%HINA_OK (\d\d\d\d)/(\d+)/(\d+) (\d+):(\d+)%io){ # for 朝日奈アンテナ hina.txt #