*** ../ljsm.pl Mon May 8 19:43:31 2006 --- ./ljsm.pl Tue Feb 7 02:04:46 2006 *************** *** 94,100 **** use File::Path; use File::Basename; use File::Find; - use HTML::Form; use Compress::Zlib; use Digest::MD5 qw(md5_hex); --- 94,99 ---- *************** *** 103,109 **** use strict; my ($ua, $req, $res, $login, @posts, %images, $user, %users, %stat, %memories, %posts, $umask); ! our ($opt_r, $opt_m, $opt_a, $opt_c, $opt_O, $opt_i, $opt_I, $opt_u, $opt_U, $opt_x, $opt_X, $opt_t, $opt_p, $opt_d); # open log file (delete it if there were no errors) $umask = umask 0077; --- 102,108 ---- use strict; my ($ua, $req, $res, $login, @posts, %images, $user, %users, %stat, %memories, %posts, $umask); ! our ($opt_r, $opt_m, $opt_a, $opt_c, $opt_O, $opt_i, $opt_I, $opt_u, $opt_U, $opt_x, $opt_t, $opt_p, $opt_d); # open log file (delete it if there were no errors) $umask = umask 0077; *************** *** 113,119 **** print LF scalar localtime() . "\n"; # steal options from @ARGV before we go for users ! getopts('rmacxtXOIUu:p:d:i:'); $opt_i = SAVE_PICS if (!$opt_i); usage() && exit unless (@ARGV); --- 112,118 ---- print LF scalar localtime() . "\n"; # steal options from @ARGV before we go for users ! getopts('rmacxtOIUu:p:d:i:'); $opt_i = SAVE_PICS if (!$opt_i); usage() && exit unless (@ARGV); *************** *** 153,172 **** # get cookies exit 1 unless (!(LOGIN || $opt_u) || ($login = lj_login())); - - - # get xml files and exit if -X option is set - if ($opt_X) { - if (!$login) { - logmsg("only registered user can export her posts as XML files\n"); - logmsg("please specify -u login:password at the command line\n"); - exit 128; - } - ($user) = split(':', $opt_u); - get_xml($user); - exit 0; - } - # get posts and memories foreach $user (@ARGV) { # for each user --- 152,157 ---- *************** *** 376,467 **** } } } - - - sub get_xml { - my ($user) = @_; - my ($path, $content, $tmp, $exportform, $month, $emonth, $year, $fname); - - logmsg("exporting $user\'s posts in XML format...\n",2); - $path = LOCAL_DIR . $user . '/export'; - mkpath($path, DEBUG_LEVEL, 0755) if (!-d $path); - my ($start_year, $start_month, $end_year, $end_month) = get_date_range($user, 1); - - # get export form - if ($content = get_page(BASE_URL . EXPORT_SCRIPT)) { - ($tmp, $exportform) = parse HTML::Form($content, BASE_URL); - if (defined $exportform) { - #$exportform->dump(); - $exportform->value('format', 'xml'); - $exportform->value('encid', 3); # 6 for KOI-8 - # uncomment next line to skip translation between encodings - # $exportform->value('notranslation', 1); - - $year = $end_year; - YEAR: - while ($year >= $start_year) { - $emonth = ($year == $start_year)? $start_month : 1; - for ($month = 12; $month >= $emonth; $month--) { - next if (($year == $end_year) && ($month > $end_month)); - $fname = $path . sprintf("/%4d_%02d.xml", $year, $month); - - if (-f $fname) { - if ($opt_O) { - logmsg("!! overwriting $fname\n",2); - } elsif ($opt_r) { - logmsg("-r: skipping $fname\n", 2); - next; - } else { - last YEAR; - } - } - - logmsg("<< $year/$month\n",2); - $exportform->value('year', $year); - $exportform->value('month', $month); - - #submit export form - $req = $exportform->click(); - $req->header('Accept-Encoding' => 'gzip;q=1.0, *;q=0'); - - foreach (0 .. MAX_TRIES) { - $res = $ua->request($req); - logmsg("retrying $year/$month...\n") if $_; - if ($res->is_success) { - $content = ($res->content_encoding && ($res->content_encoding =~ /gzip/))? - Compress::Zlib::memGunzip($res->content) : $res->content; - # save contents to a file - logmsg(">> $fname\n"); - open (DF, ">$fname") or die "error opening $fname for writing: $!\n"; - print DF $content; - close DF; - last; - - } else { - # TODO add logging of failed xml exort - my $err = $res->error_as_HTML; - $err =~ s/^[^\d].*$//mg; - $err =~ s/[\n\r]+//g; - logmsg("\n$err\n",0); - if ($_ == MAX_TRIES) { - logmsg ("failed to get XML file for $year/$month\n"); - print LF "Failed: xml for $year/$month\n"; - $stat{'pages_err'}++; - return 0 unless $opt_I; - } - sleep 2; - } - } - } - $year--; - } - - } else { - logmsg("error getting export form " . BASE_URL . EXPORT_SCRIPT); - return 0; - } - } - } --- 361,366 ----