libapse.lib,conf/03-apse.conf: fix scraping values and add new sites

Signed-off-by: Stefano Cotta Ramusino <stefano.cotta@openmamba.org>
This commit is contained in:
Stefano Cotta Ramusino 2012-07-04 03:28:22 +02:00
parent b856cd2959
commit 6f23a0da8e
2 changed files with 138 additions and 81 deletions

View File

@ -1,43 +1,71 @@
## APSE (Autospec Package Search Engine) arrays -------------------------------
apse_site_name[0]="http://fileforum.betanews.com"
apse_uri[0]="http://fileforum.betanews.com/search?search_advanced=Search+FileForum&os%5B%5D=12&alphabeta=R&title="
apse_put_fields[0]=""
apse_error_msg[0]="No results found\. Please try another search\."
apse_grep[0]=""
apse_sed[0]="s,.*<span>1.</span> <a href=[^ ]*>\${search_name}[^<]*[ ]\([^ <]*\)</a></h1>.*,\1,pi"
apse_enabled[0]=1
apse_site_name[0]="source0"
apse_site_name[1]="http://linux.softpedia.com"
apse_uri[1]="http://linux.softpedia.com/progSearch"
apse_put_fields[1]="src="
apse_error_msg[1]="No results found"
apse_grep[1]="<b[^<]*\${search_name}[^a-zA-Z]"
apse_sed[1]="s,.*<b[^<]*\${search_name}[^<]*[ ]\([^ <]*\)</b>.*,\1,pi"
apse_enabled[1]=1
apse_site_name[1]="http://sourceforge.net"
apse_site_name[2]="http://www.gnomefiles.org"
apse_uri[2]="http://www.gnomefiles.org/app.php/"
apse_enabled[2]=1
apse_site_name[2]="http://fileforum.betanews.com"
apse_uri[2]="http://fileforum.betanews.com/search?search_advanced=Search+Fileforum&os%5B%5D=12&alphabeta=R&title="
apse_put_fields[2]=""
apse_error_msg[2]="We are currently making some improvements"
apse_error_msg[2]="No releases "
apse_grep[2]=""
apse_sed[2]="s,.*<B>version \([^ <]*\)</b>.*,\1,pi"
apse_sed[2]="s,.*<p><a href=\"http://fileforum.betanews.com/detail/[^ ]*>\${search_name}[^<]*[ ]\([^ <]*\)</a></p>.*,\1,pi"
apse_site_name[3]="http://freshmeat.net"
apse_uri[3]="http://freshmeat.net/projects/"
apse_put_fields[3]=""
apse_error_msg[3]="We encountered an error"
apse_grep[3]="/releases/"
apse_sed[3]="s,.*<a href=\"\/releases\/[0-9]*\/\">\([^ <]*\)</a>.*,\1,pi"
apse_enabled[3]=1
apse_site_name[3]="http://linux.softpedia.com"
apse_uri[3]="http://linux.softpedia.com/progFinder"
apse_put_fields[3]="search_term="
apse_error_msg[3]="No results "
apse_grep[3]="<h2><a[^<]*\${search_name}[^a-zA-Z]"
apse_sed[3]="s,.*<h2><a[^<]*\${search_name}[^<]*[ ]\([^ <]*\)</a>.*,\1,pi"
apse_site_name[4]="http://sourcewell.berlios.de"
apse_uri[4]="http://sourcewell.berlios.de/appsearch.php?by=Importance&search="
apse_put_fields[4]=""
apse_error_msg[4]="No Application found"
apse_grep[4]="appbyid.*\${search_name}[^a-zA-Z].*Stable"
apse_sed[4]="s,.*<a href=\"appbyid.*\">\${search_name} \([^ <]*\)</a>.*,\1,pi"
apse_enabled[4]=1
apse_site_name[4]="http://gnomefiles.org"
apse_uri[4]="http://gnomefiles.org/content/search.php"
apse_put_fields[4]="search=Search&text="
apse_error_msg[4]="No entries\. "
apse_grep[4]=""
apse_sed[4]="s,.*\${search_name}</span></a></h1>&nbsp;\([^&]*\)&nbsp;<.*,\1,pi"
apse_site_name[5]="http://distrowatch.com"
apse_uri[5]="http://distrowatch.com/packages.php?"
apse_enabled[5]=1
apse_site_name[5]="http://freecode.com"
apse_uri[5]="http://freecode.com/projects/"
apse_put_fields[5]=""
apse_error_msg[5]="is currently being redesigned"
apse_grep[5]="td.*/\${search_name}[-_][0-9]"
apse_sed[5]="s,.*td><[^>]*>\([^ <]*\)<.*,\1,pi"
apse_error_msg[5]="404 Not Found"
apse_grep[5]="/releases/"
apse_sed[5]="s,.*<a href=\"/projects/\${search_name}/releases/[0-9]*\">\([^ <]*\)</a>.*,\1,pi"
apse_enabled[6]=1
apse_site_name[6]="http://sourcewell.berlios.de"
apse_uri[6]="http://sourcewell.berlios.de/appsearch.php?by=Importance&search="
apse_put_fields[6]=""
apse_error_msg[6]="No Application found"
apse_grep[6]="appbyid.*\${search_name}[^a-zA-Z].*Stable"
apse_sed[6]="s,.*<a href=\"appbyid.*\">\${search_name} \([^ <]*\)</a>.*,\1,pi"
apse_enabled[7]=1
apse_site_name[7]="http://distrowatch.com"
apse_uri[7]="http://distrowatch.com/packages.php?"
apse_put_fields[7]=""
apse_error_msg[7]="is currently being redesigned"
apse_grep[7]="td.*/\${search_name}[-_][0-9]"
apse_sed[7]="s,.*td><[^>]*>\([^ <]*\)<.*,\1,pi"
apse_enabled[8]=1
apse_site_name[8]="http://directory.fsf.org"
apse_uri[8]="http://directory.fsf.org/wiki/"
apse_put_fields[8]=""
apse_error_msg[8]="is currently no text"
apse_grep[8]=""
apse_sed[8]="s,^version \(.*\),\1,pi"
apse_enabled[9]=1
apse_site_name[9]="http://launchpad.net"
apse_uri[9]="http://launchpad.net/"
apse_put_fields[9]=""
apse_error_msg[9]="page does not exist"
apse_grep[9]=""
apse_sed[9]="s,.*Latest version is \(.*\),\1,pi"

View File

@ -1,6 +1,6 @@
#!/bin/bash
# libapse.lib -- Autospec Package Search Engine library
# Copyright (C) 2007 Stefano Cotta Ramusino <stefano.cotta@openmamba.org>
# Copyright (C) 2007,2012 Stefano Cotta Ramusino <stefano.cotta@openmamba.org>
# Copyright (C) 2008-2011 Silvan Calarco <silvan.calarco@mambasoft.it>
[ -z "$BASH" ] || [ ${BASH_VERSION:0:1} -lt 2 ] &&
@ -47,7 +47,7 @@ function apse.cmpversion() {
version1 version2 minfield maxfield field1 field2 \
retval code1 code2 subarra1 position
let "position = 7"
let "position = 8"
vcs=(\
"cvs" "svn" "git" "rcs" "bzr" "mtn" "ae")
@ -62,8 +62,6 @@ function apse.cmpversion() {
precedence=(\
"prealpha[*]" "alpha" "beta[*]" "rc[*]" "delta" "omega" "stable[*]")
# TODO: if there isn't in this list compare alphabetically
# TODO: if in both there is vcs compare, if only in one of them watch changelog or/and data
version1=($(echo $1 | tr '[:punct:]' ' ' | \
@ -160,6 +158,14 @@ $FUNCNAME: \`$code1' has position ${version1[$i]} in precedence"
elif [[ ${version2[$i]} -lt ${version1[$i]} ]]; then
retval=1
break
elif [ "${version2[$i]}" = "8" ]; then
if [ "$code2" \> "$code1" ]; then
retval=2
elif [ "$code2" \< "$code1" ]; then
retval=1
else
retval=0
fi
else
if [ $i -eq $(($minfield-1)) ]; then
for j in $(seq $i 1 $[$maxfield-1]); do
@ -206,7 +212,8 @@ function apse.scrapeversion() {
function usage() {
echo "\
$me, "$"version"" @version@""
Copyright (C) 2007 Stefano Cotta Ramusino <stefano.cotta@openmamba.org>""
Copyright (C) 2007,2012 Stefano Cotta Ramusino <stefano.cotta@openmamba.org>""
Copyright (C) 2008-2011 Silvan Calarco <silvan.calarco@mambasoft.it>""
"$"Find for updates scanning the web.""
@ -292,25 +299,26 @@ Copyright (C) 2007 Stefano Cotta Ramusino <stefano.cotta@openmamba.org>""
[ "$src0pckname" = "$specname" ] ||
{ src_name="$src0pckname"
notify.debug "src_name = \"$src_name\""; }
notify.debug "$FUNCNAME: src_name = \"$src_name\""; }
local curr_curl_url pck_file new_version last_version
local curr_curl_proxy_opts="\
${proxy:+ --proxy $proxy}${proxy_user:+ --proxy-user $proxy_user}"
# SOURCE0 url based search (only if 'pckurl' is a valid url)
[ "${apse_enabled[0]}" = "0" ] && notify.debug "$FUNCNAME: skipping ${apse_site_name[0]}" || \
if [[ "$pckurl" =~ .*://.* ]]; then
local pckurldir="`dirname $pckurl`/"
notify.debug "pckurldir = $pckurldir"
notify.debug "$FUNCNAME: pckurldir = $pckurldir"
local pcknameverbatim=`basename $pckurlverbatim`
notify.debug "pcknameverbatim = $pcknameverbatim"
notify.debug "$FUNCNAME: pcknameverbatim = $pcknameverbatim"
curr_curl_url="$pckurldir"
notify.note $"looking at"" <${NOTE}$curr_curl_url${NORM}> (\`${NOTE}source0${NORM}')..."
notify.note $"looking at"" ${NOTE}(#0)${NORM} <${NOTE}$curr_curl_url${NORM}> (\`${NOTE}source0${NORM}')..."
notify.debug "curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
notify.debug "running: curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
curl $curl_opts_netlink $curr_curl_proxy_opts -s -L "$curr_curl_url" > $fcurlout
let "retval = $?"
@ -349,7 +357,7 @@ ${proxy:+ --proxy $proxy}${proxy_user:+ --proxy-user $proxy_user}"
s,\.tbz2$,\\\.,")
local retval=$?
notify.debug "pcknameregexp = $pcknameregexp"
notify.debug "$FUNCNAME: pcknameregexp = $pcknameregexp"
if [ $retval -eq 0 ]; then
if [ "${pcknameregexp/(/}" = "${pcknameregexp}" ]; then
@ -392,43 +400,46 @@ sort -t. -n -r -k1 -k2 -k3 -k4 -k5 -k6 -k7 -k8 -k9 -k10 ))
if [ "$new_version" ]; then
notify.note "* "$"found version:"" \`${NOTE}$new_version${NORM}'"
last_version="$new_version"
else
notify.debug "$FUNCNAME: no new version found in current site"
fi
fi
# sourceforge.net, sf.net
if [ "${apse_enabled[1]}" = "1" ]; then
local prj_name
local prj_name
notify.note $"looking at"" <${NOTE}http://sourceforge.net${NORM}>..."
notify.note $"looking at"" ${NOTE}(#1)${NORM} <${NOTE}${apse_site_name[1]}${NORM}>..."
prj_name=$specname
[ "$prj_name" ] || notify.error $"\
prj_name=$specname
[ "$prj_name" ] || notify.error $"\
(bug)"" -- $FUNCNAME: "$"assertion failed:"" \"\$prj_name\" != \"\""
curr_curl_url="http://sourceforge.net/projects/$prj_name/files/latest"
notify.debug "curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
curl $curr_curl_proxy_opts -I -A Linux -s -L "$curr_curl_url" > $fcurlout
curr_curl_url="http://sourceforge.net/projects/$prj_name/files/latest"
notify.debug "running: curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
curl $curr_curl_proxy_opts -I -A Linux -s -L "$curr_curl_url" > $fcurlout
let "retval = $?"
case "$retval" in
0) ;;
6) notify.warning $"couldn't resolve host" ;;
7) notify.warning $"failed to connect to host" ;;
*) notify.warning $"curl error (exit code: $retval)" ;;
esac
let "retval = $?"
case "$retval" in
0) ;;
6) notify.warning $"couldn't resolve host" ;;
7) notify.warning $"failed to connect to host" ;;
*) notify.warning $"curl error (exit code: $retval)" ;;
esac
[ "$retval" = "0" ] &&
if [ "$(sed -n "s,.*Invalid Project.*,error,pi" $fcurlout)" != \
"error" ]; then
[ "$src_name" ] || src_name="$specname"
pck_file=`grep -i -m1 "Location:" $fcurlout | \
[ "$retval" = "0" ] &&
if [ "$(sed -n "s,.*Invalid Project.*,error,pi" $fcurlout)" != \
"error" ]; then
[ "$src_name" ] || src_name="$specname"
pck_file=`grep -i -m1 "Location:" $fcurlout | \
grep -i "$src_name" | \
sed -n "s,Location: \(.*\)/download,\1,pi"`
notify.debug "pck_file = \"$pck_file\""
[ "$src_name" ] && unset src_name
new_version=`echo $pck_file | sed "s|.*/\([0-9.]*\)/.*|\1|"`
[ "$new_version" ] || \
new_version=`echo $pck_file | sed -n "\
notify.debug "$FUNCNAME: pck_file = \"$pck_file\""
[ "$src_name" ] && unset src_name
new_version=`echo $pck_file | sed "s|.*/\([0-9.]*\)/.*|\1|"`
[ "$new_version" ] || \
new_version=`echo $pck_file | sed -n "\
/[0-9]/!q # return nothing if no number is found in the package name
s,.*/,, # remove directory name, if any
s/\.[^0-9].*// # remove trailing stuff (.tar.gz, ...)
@ -438,11 +449,16 @@ s|.*/\([0-9.]*\)/.*|\1|
/[^-\.][0-9]\./{s/.*[^-\.]\([0-9]\..*\)/\1/p;q} # <pck_name><pck_ver>
# <pck_name> (no version, but <pck_name> can end with numbers)
/^[^0-9]*[0-9]*$/q"`
fi
fi
if [ "$new_version" ]; then
notify.note "* "$"found version:"" \`${NOTE}$new_version${NORM}'"
last_version="$new_version"
if [ "$new_version" ]; then
notify.note "* "$"found version:"" \`${NOTE}$new_version${NORM}'"
last_version="$new_version"
else
notify.debug "$FUNCNAME: no new version found in current site"
fi
else
notify.debug "$FUNCNAME: skipping ${apse_site_name[1]}"
fi
# look at the other supported web sites...
@ -450,8 +466,11 @@ s|.*/\([0-9.]*\)/.*|\1|
# skip perl packages to prevent name confusion; these are checked above via Source0
if [ ! "$(echo $pckurl | grep "cpan.org")" ]; then
for i in ${!apse_site_name[@]}; do
[ $i -le 1 ] && continue
[ "${apse_enabled[$i]}" = "0" ] && { notify.debug \
"$FUNCNAME: skipping ${apse_site_name[$i]}"; continue; }
notify.note \
$"looking at"" <${NOTE}${apse_site_name[$i]}${NORM}>..."
$"looking at"" ${NOTE}(#$i)${NORM} <${NOTE}${apse_site_name[$i]}${NORM}>..."
for search_name in $src_name $specname; do
[ "${apse_put_fields[$i]}" ] &&
@ -460,12 +479,12 @@ $"looking at"" <${NOTE}${apse_site_name[$i]}${NORM}>..."
if [ "${apse_put_fields[$i]}" ]; then
notify.debug "\
curl \$curr_curl_proxy_opts -s -L -F ${apse_put_fields[$i]}${search_name} \"$curr_curl_url\""
curl $curr_curl_proxy_opts -s -L -F \
${apse_put_fields[$i]}${search_name} "$curr_curl_url" > $fcurlout
running: curl \$curr_curl_proxy_opts -s -L -d \"${apse_put_fields[$i]}${search_name}\" \"$curr_curl_url\""
curl $curr_curl_proxy_opts -s -L -d \
"${apse_put_fields[$i]}${search_name}" "$curr_curl_url" > $fcurlout
else
notify.debug "\
curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
running: curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
curl $curr_curl_proxy_opts -s -L "$curr_curl_url" > $fcurlout
fi
@ -481,11 +500,17 @@ curl \$curr_curl_proxy_opts -s -L \"$curr_curl_url\""
if [ "$(cat $fcurlout | sed -n "\
s,.*${apse_error_msg[$i]}.*,error,pi")" != "error" ]; then
current_grep="$(echo "${apse_grep[$i]}" | sed -e "s,\${search_name},${search_name},g")"
current_sed="$(echo "${apse_sed[$i]}" | sed -e "s,\${search_name},${search_name},g")"
notify.debug "\
grep -i -m1 \"$current_grep\" \$fcurlout | sed -n \"$current_sed\""
new_version=$(grep -i -m1 "$current_grep" $fcurlout | sed -n "$current_sed")
if [ "${apse_grep[$i]}" ]; then
current_grep="$(echo "${apse_grep[$i]}" | sed -e "s,\${search_name},${search_name},g")"
notify.debug "\
running: grep -i -m1 \"$current_grep\" \$fcurlout | sed -n \"$current_sed\""
new_version=$(grep -i -m1 "$current_grep" $fcurlout | sed -n "$current_sed")
else
notify.debug "\
running: sed -n \"$current_sed\" \$fcurlout"
new_version=$(sed -n "$current_sed" $fcurlout)
fi
if [ "$new_version" ]; then
notify.note "\
* "$"found version:"" \`${NOTE}$new_version${NORM}'"
@ -496,7 +521,11 @@ grep -i -m1 \"$current_grep\" \$fcurlout | sed -n \"$current_sed\""
else
last_version="$new_version"
fi
else
notify.debug "$FUNCNAME: no new version found in current site"
fi
else
notify.debug "$FUNCNAME: no new version found in current site"
fi
done
done