libapse.lib,conf/03-apse.conf: fix scraping values and add new sites

Signed-off-by: Stefano Cotta Ramusino <stefano.cotta@openmamba.org>
This commit is contained in:
Stefano Cotta Ramusino 2012-07-04 03:28:22 +02:00
parent b856cd2959
commit 6f23a0da8e
2 changed files with 138 additions and 81 deletions

View File

@ -1,43 +1,71 @@
## APSE (Autospec Package Search Engine) arrays ------------------------------- ## APSE (Autospec Package Search Engine) arrays -------------------------------
apse_site_name[0]="http://fileforum.betanews.com" apse_enabled[0]=1
apse_uri[0]="http://fileforum.betanews.com/search?search_advanced=Search+FileForum&os%5B%5D=12&alphabeta=R&title=" apse_site_name[0]="source0"
apse_put_fields[0]=""
apse_error_msg[0]="No results found\. Please try another search\."
apse_grep[0]=""
apse_sed[0]="s,.*<span>1.</span> <a href=[^ ]*>\${search_name}[^<]*[ ]\([^ <]*\)</a></h1>.*,\1,pi"
apse_site_name[1]="http://linux.softpedia.com" apse_enabled[1]=1
apse_uri[1]="http://linux.softpedia.com/progSearch" apse_site_name[1]="http://sourceforge.net"
apse_put_fields[1]="src="
apse_error_msg[1]="No results found"
apse_grep[1]="<b[^<]*\${search_name}[^a-zA-Z]"
apse_sed[1]="s,.*<b[^<]*\${search_name}[^<]*[ ]\([^ <]*\)</b>.*,\1,pi"
apse_site_name[2]="http://www.gnomefiles.org" apse_enabled[2]=1
apse_uri[2]="http://www.gnomefiles.org/app.php/" apse_site_name[2]="http://fileforum.betanews.com"
apse_uri[2]="http://fileforum.betanews.com/search?search_advanced=Search+Fileforum&os%5B%5D=12&alphabeta=R&title="
apse_put_fields[2]="" apse_put_fields[2]=""
apse_error_msg[2]="We are currently making some improvements" apse_error_msg[2]="No releases "
apse_grep[2]="" apse_grep[2]=""
apse_sed[2]="s,.*<B>version \([^ <]*\)</b>.*,\1,pi" apse_sed[2]="s,.*<p><a href=\"http://fileforum.betanews.com/detail/[^ ]*>\${search_name}[^<]*[ ]\([^ <]*\)</a></p>.*,\1,pi"
apse_site_name[3]="http://freshmeat.net" apse_enabled[3]=1
apse_uri[3]="http://freshmeat.net/projects/" apse_site_name[3]="http://linux.softpedia.com"
apse_put_fields[3]="" apse_uri[3]="http://linux.softpedia.com/progFinder"
apse_error_msg[3]="We encountered an error" apse_put_fields[3]="search_term="
apse_grep[3]="/releases/" apse_error_msg[3]="No results "
apse_sed[3]="s,.*<a href=\"\/releases\/[0-9]*\/\">\([^ <]*\)</a>.*,\1,pi" apse_grep[3]="<h2><a[^<]*\${search_name}[^a-zA-Z]"
apse_sed[3]="s,.*<h2><a[^<]*\${search_name}[^<]*[ ]\([^ <]*\)</a>.*,\1,pi"
apse_site_name[4]="http://sourcewell.berlios.de" apse_enabled[4]=1
apse_uri[4]="http://sourcewell.berlios.de/appsearch.php?by=Importance&search=" apse_site_name[4]="http://gnomefiles.org"
apse_put_fields[4]="" apse_uri[4]="http://gnomefiles.org/content/search.php"
apse_error_msg[4]="No Application found" apse_put_fields[4]="search=Search&text="
apse_grep[4]="appbyid.*\${search_name}[^a-zA-Z].*Stable" apse_error_msg[4]="No entries\. "
apse_sed[4]="s,.*<a href=\"appbyid.*\">\${search_name} \([^ <]*\)</a>.*,\1,pi" apse_grep[4]=""
apse_sed[4]="s,.*\${search_name}</span></a></h1>&nbsp;\([^&]*\)&nbsp;<.*,\1,pi"
apse_site_name[5]="http://distrowatch.com" apse_enabled[5]=1
apse_uri[5]="http://distrowatch.com/packages.php?" apse_site_name[5]="http://freecode.com"
apse_uri[5]="http://freecode.com/projects/"
apse_put_fields[5]="" apse_put_fields[5]=""
apse_error_msg[5]="is currently being redesigned" apse_error_msg[5]="404 Not Found"
apse_grep[5]="td.*/\${search_name}[-_][0-9]" apse_grep[5]="/releases/"
apse_sed[5]="s,.*td><[^>]*>\([^ <]*\)<.*,\1,pi" apse_sed[5]="s,.*<a href=\"/projects/\${search_name}/releases/[0-9]*\">\([^ <]*\)</a>.*,\1,pi"
apse_enabled[6]=1
apse_site_name[6]="http://sourcewell.berlios.de"
apse_uri[6]="http://sourcewell.berlios.de/appsearch.php?by=Importance&search="
apse_put_fields[6]=""
apse_error_msg[6]="No Application found"
apse_grep[6]="appbyid.*\${search_name}[^a-zA-Z].*Stable"
apse_sed[6]="s,.*<a href=\"appbyid.*\">\${search_name} \([^ <]*\)</a>.*,\1,pi"
apse_enabled[7]=1
apse_site_name[7]="http://distrowatch.com"
apse_uri[7]="http://distrowatch.com/packages.php?"
apse_put_fields[7]=""
apse_error_msg[7]="is currently being redesigned"
apse_grep[7]="td.*/\${search_name}[-_][0-9]"
apse_sed[7]="s,.*td><[^>]*>\([^ <]*\)<.*,\1,pi"
apse_enabled[8]=1
apse_site_name[8]="http://directory.fsf.org"
apse_uri[8]="http://directory.fsf.org/wiki/"
apse_put_fields[8]=""
apse_error_msg[8]="is currently no text"
apse_grep[8]=""
apse_sed[8]="s,^version \(.*\),\1,pi"
apse_enabled[9]=1
apse_site_name[9]="http://launchpad.net"
apse_uri[9]="http://launchpad.net/"
apse_put_fields[9]=""
apse_error_msg[9]="page does not exist"
apse_grep[9]=""
apse_sed[9]="s,.*Latest version is \(.*\),\1,pi"

View File

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
# libapse.lib -- Autospec Package Search Engine library # libapse.lib -- Autospec Package Search Engine library
# Copyright (C) 2007 Stefano Cotta Ramusino <stefano.cotta@openmamba.org> # Copyright (C) 2007,2012 Stefano Cotta Ramusino <stefano.cotta@openmamba.org>
# Copyright (C) 2008-2011 Silvan Calarco <silvan.calarco@mambasoft.it> # Copyright (C) 2008-2011 Silvan Calarco <silvan.calarco@mambasoft.it>
[ -z "$BASH" ] || [ ${BASH_VERSION:0:1} -lt 2 ] && [ -z "$BASH" ] || [ ${BASH_VERSION:0:1} -lt 2 ] &&
@ -47,7 +47,7 @@ function apse.cmpversion() {
version1 version2 minfield maxfield field1 field2 \ version1 version2 minfield maxfield field1 field2 \
retval code1 code2 subarra1 position retval code1 code2 subarra1 position
let "position = 7" let "position = 8"
vcs=(\ vcs=(\
"cvs" "svn" "git" "rcs" "bzr" "mtn" "ae") "cvs" "svn" "git" "rcs" "bzr" "mtn" "ae")
@ -62,8 +62,6 @@ function apse.cmpversion() {
precedence=(\ precedence=(\
"prealpha[*]" "alpha" "beta[*]" "rc[*]" "delta" "omega" "stable[*]") "prealpha[*]" "alpha" "beta[*]" "rc[*]" "delta" "omega" "stable[*]")
# TODO: if there isn't in this list compare alphabetically
# TODO: if in both there is vcs compare, if only in one of them watch changelog or/and data # TODO: if in both there is vcs compare, if only in one of them watch changelog or/and data
version1=($(echo $1 | tr '[:punct:]' ' ' | \ version1=($(echo $1 | tr '[:punct:]' ' ' | \
@ -160,6 +158,14 @@ $FUNCNAME: \`$code1' has position ${version1[$i]} in precedence"
elif [[ ${version2[$i]} -lt ${version1[$i]} ]]; then elif [[ ${version2[$i]} -lt ${version1[$i]} ]]; then
retval=1 retval=1
break break
elif [ "${version2[$i]}" = "8" ]; then
if [ "$code2" \> "$code1" ]; then
retval=2
elif [ "$code2" \< "$code1" ]; then
retval=1
else
retval=0
fi
else else
if [ $i -eq $(($minfield-1)) ]; then if [ $i -eq $(($minfield-1)) ]; then
for j in $(seq $i 1 $[$maxfield-1]); do for j in $(seq $i 1 $[$maxfield-1]); do
@ -206,7 +212,8 @@ function apse.scrapeversion() {
function usage() { function usage() {
echo "\ echo "\
$me, "$"version"" @version@"" $me, "$"version"" @version@""
Copyright (C) 2007 Stefano Cotta Ramusino <stefano.cotta@openmamba.org>"" Copyright (C) 2007,2012 Stefano Cotta Ramusino <stefano.cotta@openmamba.org>""
Copyright (C) 2008-2011 Silvan Calarco <silvan.calarco@mambasoft.it>""
"$"Find for updates scanning the web."" "$"Find for updates scanning the web.""
@ -292,25 +299,26 @@ Copyright (C) 2007 Stefano Cotta Ramusino <stefano.cotta@openmamba.org>""
[ "$src0pckname" = "$specname" ] || [ "$src0pckname" = "$specname" ] ||
{ src_name="$src0pckname" { src_name="$src0pckname"
notify.debug "src_name = \"$src_name\""; } notify.debug "$FUNCNAME: src_name = \"$src_name\""; }
local curr_curl_url pck_file new_version last_version local curr_curl_url pck_file new_version last_version
local curr_curl_proxy_opts="\ local curr_curl_proxy_opts="\
${proxy:+ --proxy $proxy}${proxy_user:+ --proxy-user $proxy_user}" ${proxy:+ --proxy $proxy}${proxy_user:+ --proxy-user $proxy_user}"
# SOURCE0 url based search (only if 'pckurl' is a valid url) # SOURCE0 url based search (only if 'pckurl' is a valid url)
[ "${apse_enabled[0]}" = "0" ] && notify.debug "$FUNCNAME: skipping ${apse_site_name[0]}" || \
if [[ "$pckurl" =~ .*://.* ]]; then if [[ "$pckurl" =~ .*://.* ]]; then
local pckurldir="`dirname $pckurl`/" local pckurldir="`dirname $pckurl`/"
notify.debug "pckurldir = $pckurldir" notify.debug "$FUNCNAME: pckurldir = $pckurldir"
local pcknameverbatim=`basename $pckurlverbatim` local pcknameverbatim=`basename $pckurlverbatim`
notify.debug "pcknameverbatim = $pcknameverbatim" notify.debug "$FUNCNAME: pcknameverbatim = $pcknameverbatim"
curr_curl_url="$pckurldir" curr_curl_url="$pckurldir"
notify.note $"looking at"" <${NOTE}$curr_curl_url${NORM}> (\`${NOTE}source0${NORM}')..." notify.note $"looking at"" ${NOTE}(#0)${NORM} <${NOTE}$curr_curl_url${NORM}> (\`${NOTE}source0${NORM}')..."
notify.debug "curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\"" notify.debug "running: curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
curl $curl_opts_netlink $curr_curl_proxy_opts -s -L "$curr_curl_url" > $fcurlout curl $curl_opts_netlink $curr_curl_proxy_opts -s -L "$curr_curl_url" > $fcurlout
let "retval = $?" let "retval = $?"
@ -349,7 +357,7 @@ ${proxy:+ --proxy $proxy}${proxy_user:+ --proxy-user $proxy_user}"
s,\.tbz2$,\\\.,") s,\.tbz2$,\\\.,")
local retval=$? local retval=$?
notify.debug "pcknameregexp = $pcknameregexp" notify.debug "$FUNCNAME: pcknameregexp = $pcknameregexp"
if [ $retval -eq 0 ]; then if [ $retval -eq 0 ]; then
if [ "${pcknameregexp/(/}" = "${pcknameregexp}" ]; then if [ "${pcknameregexp/(/}" = "${pcknameregexp}" ]; then
@ -392,43 +400,46 @@ sort -t. -n -r -k1 -k2 -k3 -k4 -k5 -k6 -k7 -k8 -k9 -k10 ))
if [ "$new_version" ]; then if [ "$new_version" ]; then
notify.note "* "$"found version:"" \`${NOTE}$new_version${NORM}'" notify.note "* "$"found version:"" \`${NOTE}$new_version${NORM}'"
last_version="$new_version" last_version="$new_version"
else
notify.debug "$FUNCNAME: no new version found in current site"
fi fi
fi fi
# sourceforge.net, sf.net # sourceforge.net, sf.net
if [ "${apse_enabled[1]}" = "1" ]; then
local prj_name local prj_name
notify.note $"looking at"" <${NOTE}http://sourceforge.net${NORM}>..." notify.note $"looking at"" ${NOTE}(#1)${NORM} <${NOTE}${apse_site_name[1]}${NORM}>..."
prj_name=$specname prj_name=$specname
[ "$prj_name" ] || notify.error $"\ [ "$prj_name" ] || notify.error $"\
(bug)"" -- $FUNCNAME: "$"assertion failed:"" \"\$prj_name\" != \"\"" (bug)"" -- $FUNCNAME: "$"assertion failed:"" \"\$prj_name\" != \"\""
curr_curl_url="http://sourceforge.net/projects/$prj_name/files/latest" curr_curl_url="http://sourceforge.net/projects/$prj_name/files/latest"
notify.debug "curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\"" notify.debug "running: curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
curl $curr_curl_proxy_opts -I -A Linux -s -L "$curr_curl_url" > $fcurlout curl $curr_curl_proxy_opts -I -A Linux -s -L "$curr_curl_url" > $fcurlout
let "retval = $?" let "retval = $?"
case "$retval" in case "$retval" in
0) ;; 0) ;;
6) notify.warning $"couldn't resolve host" ;; 6) notify.warning $"couldn't resolve host" ;;
7) notify.warning $"failed to connect to host" ;; 7) notify.warning $"failed to connect to host" ;;
*) notify.warning $"curl error (exit code: $retval)" ;; *) notify.warning $"curl error (exit code: $retval)" ;;
esac esac
[ "$retval" = "0" ] && [ "$retval" = "0" ] &&
if [ "$(sed -n "s,.*Invalid Project.*,error,pi" $fcurlout)" != \ if [ "$(sed -n "s,.*Invalid Project.*,error,pi" $fcurlout)" != \
"error" ]; then "error" ]; then
[ "$src_name" ] || src_name="$specname" [ "$src_name" ] || src_name="$specname"
pck_file=`grep -i -m1 "Location:" $fcurlout | \ pck_file=`grep -i -m1 "Location:" $fcurlout | \
grep -i "$src_name" | \ grep -i "$src_name" | \
sed -n "s,Location: \(.*\)/download,\1,pi"` sed -n "s,Location: \(.*\)/download,\1,pi"`
notify.debug "pck_file = \"$pck_file\"" notify.debug "$FUNCNAME: pck_file = \"$pck_file\""
[ "$src_name" ] && unset src_name [ "$src_name" ] && unset src_name
new_version=`echo $pck_file | sed "s|.*/\([0-9.]*\)/.*|\1|"` new_version=`echo $pck_file | sed "s|.*/\([0-9.]*\)/.*|\1|"`
[ "$new_version" ] || \ [ "$new_version" ] || \
new_version=`echo $pck_file | sed -n "\ new_version=`echo $pck_file | sed -n "\
/[0-9]/!q # return nothing if no number is found in the package name /[0-9]/!q # return nothing if no number is found in the package name
s,.*/,, # remove directory name, if any s,.*/,, # remove directory name, if any
s/\.[^0-9].*// # remove trailing stuff (.tar.gz, ...) s/\.[^0-9].*// # remove trailing stuff (.tar.gz, ...)
@ -438,11 +449,16 @@ s|.*/\([0-9.]*\)/.*|\1|
/[^-\.][0-9]\./{s/.*[^-\.]\([0-9]\..*\)/\1/p;q} # <pck_name><pck_ver> /[^-\.][0-9]\./{s/.*[^-\.]\([0-9]\..*\)/\1/p;q} # <pck_name><pck_ver>
# <pck_name> (no version, but <pck_name> can end with numbers) # <pck_name> (no version, but <pck_name> can end with numbers)
/^[^0-9]*[0-9]*$/q"` /^[^0-9]*[0-9]*$/q"`
fi fi
if [ "$new_version" ]; then if [ "$new_version" ]; then
notify.note "* "$"found version:"" \`${NOTE}$new_version${NORM}'" notify.note "* "$"found version:"" \`${NOTE}$new_version${NORM}'"
last_version="$new_version" last_version="$new_version"
else
notify.debug "$FUNCNAME: no new version found in current site"
fi
else
notify.debug "$FUNCNAME: skipping ${apse_site_name[1]}"
fi fi
# look at the other supported web sites... # look at the other supported web sites...
@ -450,8 +466,11 @@ s|.*/\([0-9.]*\)/.*|\1|
# skip perl packages to prevent name confusion; these are checked above via Source0 # skip perl packages to prevent name confusion; these are checked above via Source0
if [ ! "$(echo $pckurl | grep "cpan.org")" ]; then if [ ! "$(echo $pckurl | grep "cpan.org")" ]; then
for i in ${!apse_site_name[@]}; do for i in ${!apse_site_name[@]}; do
[ $i -le 1 ] && continue
[ "${apse_enabled[$i]}" = "0" ] && { notify.debug \
"$FUNCNAME: skipping ${apse_site_name[$i]}"; continue; }
notify.note \ notify.note \
$"looking at"" <${NOTE}${apse_site_name[$i]}${NORM}>..." $"looking at"" ${NOTE}(#$i)${NORM} <${NOTE}${apse_site_name[$i]}${NORM}>..."
for search_name in $src_name $specname; do for search_name in $src_name $specname; do
[ "${apse_put_fields[$i]}" ] && [ "${apse_put_fields[$i]}" ] &&
@ -460,12 +479,12 @@ $"looking at"" <${NOTE}${apse_site_name[$i]}${NORM}>..."
if [ "${apse_put_fields[$i]}" ]; then if [ "${apse_put_fields[$i]}" ]; then
notify.debug "\ notify.debug "\
curl \$curr_curl_proxy_opts -s -L -F ${apse_put_fields[$i]}${search_name} \"$curr_curl_url\"" running: curl \$curr_curl_proxy_opts -s -L -d \"${apse_put_fields[$i]}${search_name}\" \"$curr_curl_url\""
curl $curr_curl_proxy_opts -s -L -F \ curl $curr_curl_proxy_opts -s -L -d \
${apse_put_fields[$i]}${search_name} "$curr_curl_url" > $fcurlout "${apse_put_fields[$i]}${search_name}" "$curr_curl_url" > $fcurlout
else else
notify.debug "\ notify.debug "\
curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\"" running: curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
curl $curr_curl_proxy_opts -s -L "$curr_curl_url" > $fcurlout curl $curr_curl_proxy_opts -s -L "$curr_curl_url" > $fcurlout
fi fi
@ -481,11 +500,17 @@ curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
if [ "$(cat $fcurlout | sed -n "\ if [ "$(cat $fcurlout | sed -n "\
s,.*${apse_error_msg[$i]}.*,error,pi")" != "error" ]; then s,.*${apse_error_msg[$i]}.*,error,pi")" != "error" ]; then
current_grep="$(echo "${apse_grep[$i]}" | sed -e "s,\${search_name},${search_name},g")"
current_sed="$(echo "${apse_sed[$i]}" | sed -e "s,\${search_name},${search_name},g")" current_sed="$(echo "${apse_sed[$i]}" | sed -e "s,\${search_name},${search_name},g")"
notify.debug "\ if [ "${apse_grep[$i]}" ]; then
grep -i -m1 \"$current_grep\" \$fcurlout | sed -n \"$current_sed\"" current_grep="$(echo "${apse_grep[$i]}" | sed -e "s,\${search_name},${search_name},g")"
new_version=$(grep -i -m1 "$current_grep" $fcurlout | sed -n "$current_sed") notify.debug "\
running: grep -i -m1 \"$current_grep\" \$fcurlout | sed -n \"$current_sed\""
new_version=$(grep -i -m1 "$current_grep" $fcurlout | sed -n "$current_sed")
else
notify.debug "\
running: sed -n \"$current_sed\" \$fcurlout"
new_version=$(sed -n "$current_sed" $fcurlout)
fi
if [ "$new_version" ]; then if [ "$new_version" ]; then
notify.note "\ notify.note "\
* "$"found version:"" \`${NOTE}$new_version${NORM}'" * "$"found version:"" \`${NOTE}$new_version${NORM}'"
@ -496,7 +521,11 @@ grep -i -m1 \"$current_grep\" \$fcurlout | sed -n \"$current_sed\""
else else
last_version="$new_version" last_version="$new_version"
fi fi
else
notify.debug "$FUNCNAME: no new version found in current site"
fi fi
else
notify.debug "$FUNCNAME: no new version found in current site"
fi fi
done done
done done