#!/usr/bin/perl # Print patent from IBM server # Usage: prpat [ --scale ] # scale factor defaults to 0.5 # http://www.delphion.com/gifcache/US05325479__.tif.1.s0.35.r0.gif # http://www.delphion.com/fcgi-bin/any2html?FILENAME=%2Fcache%2F79%2F54%2FUS05325479__.tif&PAGE=13&USER_HTML=%253CA%2BHREF%253D%2Forder%253Fpn%3Dus05325479__%253EOrderPatent%253C%2FA%253E&SCALE=0.35 # http://www.delphion.com/fcgi-bin/any2html?FILENAME=%2Fcache%2F79%2F54%2FUS05325479__.tif&PAGE=1&USER_HTML=%253CA%2BHREF%253D%2Forder%253Fpn%3Dus05325479__%253EOrderPatent%253C%2FA%253E&SCALE=1.00 # http://www.delphion.com/cgi-bin/viewpat.cmd/US05325479__ $tmpdir = '/tmp'; sub get_url { my ($url, $fn) = @_; if (-s $fn) { # in cache, don't retrieve return 0; } pdb ("getting $url\n"); $qurl = shell_quote ($url); return system "lynx -source $qurl > $fn"; } sub shell_quote { # $quoted_string = &shell_quote ($raw_string) my ($raw) = @_; if ($raw eq '') { return '""'; } $raw =~ s/(\W)/\\$1/g; return $raw; } sub pdb { print @_; } sub prpage { my ($patnum, $page, $url) = @_; my $imgurl; $url =~ s/SCALE=[\d\.]+/SCALE=$scale/; $url =~ s/PAGE=\d+/PAGE=$page/; my $fn = "$tmpdir/pat-$patnum-page$page.html"; get_url ($url, $fn); open PAGE, $fn; while () { if (!$imgurl && /\) { if (!$npages && /ALT=\"1\/(\d+) /) { $npages = $1; pdb ("$npages pages\n"); } elsif (!$baseurl && /\