#!/usr/local/bin/perl use strict; use lib './'; #require 'jcode.pl'; use NKF; use Time::Local; use DI::blocks; use DI::Di; use HTTP::Date; use IO::Socket; use IO::Select; use File::Copy 'cp'; use vars qw( $DI_DATA $CACHED_DI_FILE $REMOTE_CFG $REMOTE_CFG_FILE %BYMYSELF $CFG_FILE $URL $METHOD $M_NAME $R_FILE $G_URL $R_URL $LMD $VERSION $USER_AGENT $HTTP_PROXY $DATA_DIR $THIS_TIME $THIS_YEAR $EXPIRE_MARGIN $ONE_HOUR $ONE_DAY $CONNECT_TIMEOUT $READ_TIMEOUT ); $REMOTE_CFG_FILE = 'remote.cfg'; $CFG_FILE = 'popopo.cfg'; $CACHED_DI_FILE = 'po.di'; $URL = 'http://popopo.hauN.org/'; $DATA_DIR ='./data'; $VERSION = "2.0b7 (19991005; DI::Di/$DI::Di::VERSION; DI::blocks/$DI::blocks::VERSION)"; $USER_AGENT = 'MICAN'; $USER_AGENT .= "/$VERSION"; # # &jcode::init(); # $METHOD = 'METHOD'; $M_NAME = 'METHOD_NAME'; $R_FILE = 'REMOTE_FILE'; $G_URL = 'GET_URL'; $R_URL = 'REFER_URL'; $LMD = 'L_M_D'; # # $ONE_HOUR = 60 * 60; $ONE_DAY = $ONE_HOUR * 24; $EXPIRE_MARGIN = $ONE_HOUR * 2; $THIS_TIME = time(); $THIS_YEAR = (localtime($THIS_TIME))[5]; # $CONNECT_TIMEOUT = 1; $READ_TIMEOUT = 20; # # $DI_DATA = new DI::Di(); ## read config &read_config_file; ## read remote CFG &remote_cfg; ## read cache &read_cache_di; ## change to data directry chdir ( $DATA_DIR ); foreach my $file ( keys %$REMOTE_CFG ){ # print STDERR "FILE: $file\n"; # next if &get_remote($file); &get_remote($file); &remote_data($file); } $DI_DATA->sort(); print "User-Agent: $USER_AGENT\n"; print "Date: ",HTTP::Date::time2str($THIS_TIME),"\n\n"; print $DI_DATA->as_di_string; exit; ### ## # sub read_cache_di{ my $RS = $/;$/="\n\n"; open(DI,$CACHED_DI_FILE); while(){ my (%rec)=(); my $di = undef; my @lines=split("\n"); foreach my $line (@lines){ my ($a, $b) = split(/:\s*/,$line,2); $rec{"\L$a\E"} = $b; } if (exists $rec{'url'} ) { $di = new DI::blocks( %rec ); next if ( $di->last_modified > $di->last_modified_detected ); if ( ($di->expired) || ($di->last_modified == 0 )){ if ( exists $BYMYSELF{$rec{'url'}} ){ $REMOTE_CFG->{$BYMYSELF{$rec{'url'}}}->{$LMD} = 0; } }else{ $DI_DATA->update($di); } } } close(DI); $/ = $RS; } ### ## リモートTEIGIの読み込み # sub remote_cfg{ my ($method, $name, $filename,$url, $refer); open ( FILE ,$REMOTE_CFG_FILE); while (){ next if ( /^\s*$/ ); next if ( /^#/ ); # $_ = &jcode::euc($_); $_ = nkf("-e",$_); chomp; ( $method, $name, $filename, $url, $refer ) = split ("::"); $REMOTE_CFG->{$filename}->{$METHOD} = $method; $REMOTE_CFG->{$filename}->{$M_NAME} = $name; $REMOTE_CFG->{$filename}->{$G_URL} = $url; $REMOTE_CFG->{$filename}->{$R_URL} = $refer; if ( -e "$DATA_DIR/$filename" ){ $REMOTE_CFG->{$filename}->{$LMD} = &file_time("$DATA_DIR/$filename"); }else{ $REMOTE_CFG->{$filename}->{$LMD} = 0; } if ( $method ne 'remote' ){ $BYMYSELF{$refer} = $filename; } } close FILE; } ### ## リモートファイルから情報を取得。 ## hina.txt,hina.di,dirp,http headerの出力に対応 # sub remote_data{ my $file = shift; my $url = undef; my $time = 0; my $detected = 0; my %rec = (); open (FILE,$file); while (){ # EUCにする # $_=&jcode::euc($_); $_ = nkf("-e",$_); # 改行コードを取り除く chomp; if ( /^[\w\-_]+:/ ){ my ($feature, $value) = split(/:\s*/,$_,2); if ( $feature =~ /last-modified/i ){ $detected ||= &lm2time($value); }elsif ( $feature =~ /url$/i){ $value =~ s!/%7E!/~!; } if ( DI::blocks->is_feature_defined($feature) ){ $rec{$feature} = $value ; } }elsif (/HINA_OK/){ # \ # まさなりさん  09/11 16:20 if ( m%[^\(]]+href=\"([^\"]+)\"[^>]*>%i ){ $url = $1; # URLの後のゴミを取り除く。 $url =~ s/[\?&]\d+(\#\w+)?$//; $url =~ s/\%7E/~/o; # 時刻を取り出す。 $time = &lm2time($_); &set_info( $file, $url, $time, $detected ); $url = ""; $time = 0; # $detected = 0; } # 猫フォーマット }elsif ( /^(\d+)\s\d+\s(\d+)\s\d{3}\s\w+\s(http:\/\/\S+)/ ){ $detected = $1; $time = $2; $url = $3; # URLの後のゴミを取り除く。 $url =~ s/[\?&]\d+(\#\w+)?$//; $url =~ s/\%7E/~/o; if ( $url ne "" && $time ne ""){ &set_info( $file, $url, $time, $detected ); $url = ""; $time = 0; # $detected = 0; } }elsif ( /\d+:\d+/){ if (m%href=\"([^\"]+)\"[^>]*>%i){ $url = $1; # URLの後のゴミを取り除く。 $url =~ s/[\?&]\d+(\#\w+)?$//; $url =~ s/\%7E/~/o; # 時刻を取り出す。 $time = &lm2time($_); if ( $url ne "" && $time ne ""){ &set_info( $file, $url, $time, $detected ); $url = ""; $time = 0; $detected = 0; } } }elsif (/^$/){ my $di = new DI::blocks( %rec ); %rec = (); # next if ( $di->authorized_url =~ /\Q$URL\E/i ); next if ( $di->last_modified > $THIS_TIME + $ONE_HOUR / 4 ); $di->header(X_Mican_Authorized => $REMOTE_CFG->{$file}->{$M_NAME}); $di->header(X_Mican_Authorized_Url => $REMOTE_CFG->{$file}->{$R_URL}); unless ( defined $di->last_modified_detected ){ $di->last_modified_detected($detected); } if ( defined $di->expire ){ $di->expires($di->expire) unless ( defined $di->expires ); } # if ( ( $di->url =~ /^http:/ ) && ($di->expired == 0 ) ){ if ( $di->url =~ /^http:/ ){ $DI_DATA->update($di); } %rec = (); } } } ### ## リモートデータをupdateする。 # sub set_info{ my $file = shift; my $url = shift; my $time = shift; my $detected = shift; my $di = new DI::blocks( URL => $url ); $di->last_modified($time); return if ( $di->last_modified > $THIS_TIME + $ONE_HOUR / 4 ); # if ($detected){ $di->last_modified_detected($detected); # } $di->header(X_Mican_Authorized => $REMOTE_CFG->{$file}->{$M_NAME}); $di->header(X_Mican_Authorized_Url => $REMOTE_CFG->{$file}->{$R_URL}); $di->header(Authorized_Url => $REMOTE_CFG->{$file}->{$R_URL}); # $di->expire( $THIS_TIME + $EXPIRE_MARGIN ); # $di->expires( $THIS_TIME + $EXPIRE_MARGIN ); $di->expire( $THIS_TIME ); $di->expires( $THIS_TIME ); $DI_DATA->update($di); } ### ## 様々な形式の時刻表記から、timeを取り出す。 # sub lm2time{ $_ = shift ; my $time = 0; my ($y,$m,$d, $H,$M,$S); if (m%\w+, (\d+) (\w+) (\d+) (\d\d):(\d\d):(\d\d) (\w+)%io) { # rfc1123 "Sun, 06 Nov 1994 08:49:37 GMT" $y = $3 ; $y -= 1900 if ( $y >= 1900); $y += 100 if ( $y <70 ); $time = &timeglobal( $7, $6, $5, $4, $1,&month2num($2),$y); } elsif (m%\w+, (\d+)-(\w+)-(\d+) (\d\d):(\d\d):(\d\d) (\w+)%io) { # rfc1036 "Sunday, 06-Nov-94 08:49:37 GMT" $y = $3 ; $y -= 1900 if ( $y >= 1900); $y += 100 if ( $y <70 ); $time = &timeglobal( $7, $6, $5, $4, $1,&month2num($2),$y); } elsif (m%\w+ (\w+) (\d+) (\d\d):(\d\d):(\d\d) (\d+)%io) { # Sun Nov 6 08:49:37 1994 (ANSI C's asctime() format) $y = $6 ; $y -= 1900 if ( $y >= 1900); $y += 100 if ( $y <70 ); $time = &timeglobal( "GMT", $5, $4, $3, $2,&month2num($1),$y); } elsif (m%HINA_OK (\d\d\d\d)/(\d+)/(\d+) (\d+):(\d+)%io){ # for 朝日奈アンテナ hina.txt #