diff --git a/README.md b/README.md index 78f0b85..a049ebd 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,7 @@ # libwww +Libwww is a general-purpose Web API written in C for Unix and Windows (Win32). +With a highly extensible and layered API, it can accommodate many different types of applications including clients, robots, etc. +The purpose of libwww is to provide a highly optimized HTTP sample implementation as well as other Internet protocols and to serve as a testbed for protocol experiments. +Libwww also supports HTTPS, thru OpenSSL. + diff --git a/libwww-5.4.0-can_2005_3183.patch b/libwww-5.4.0-can_2005_3183.patch new file mode 100644 index 0000000..8178242 --- /dev/null +++ b/libwww-5.4.0-can_2005_3183.patch @@ -0,0 +1,534 @@ +--- w3c-libwww-5.4.0/Library/src/HTBound.c.htfix 1999-02-22 23:10:10.000000000 +0100 ++++ w3c-libwww-5.4.0/Library/src/HTBound.c 2005-09-30 12:50:50.000000000 +0200 +@@ -3,7 +3,7 @@ + ** + ** (c) COPYRIGHT MIT 1995. + ** Please first read the full copyright statement in the file COPYRIGH. +-** @(#) $Id: HTBound.c,v 2.14 1999/02/22 22:10:10 frystyk Exp $ ++** @(#) HTBound.c,v 1.2 2005/06/06 15:22:10 mrsam Exp + ** + ** This stream parses a MIME multipart stream and builds a set of new + ** streams via the stream stack each time we encounter a boundary start. +@@ -11,9 +11,12 @@ + ** + ** Authors + ** HF Henrik Frystyk ++** SV Sam Varshavchik + ** + ** History: + ** Nov 95 Written from scratch ++** SV Jun 05 Rewrote HTBoundary_put_block. Fixed many bugs+segfaults. ++** SV Jul 05 Fix double-counting of processed bytes. + ** + */ + +@@ -23,104 +26,395 @@ + #include "WWWCore.h" + #include "HTMerge.h" + #include "HTReqMan.h" ++#include "HTNetMan.h" ++#include "HTChannl.h" + #include "HTBound.h" /* Implemented here */ + +-#define PUTBLOCK(b, l) (*me->target->isa->put_block)(me->target, b, l) ++#define PUTBLOCK(b, l) (me->target ? (*me->target->isa->put_block)(me->target, b, l):HT_OK) ++ + #define PUTDEBUG(b, l) (*me->debug->isa->put_block)(me->debug, b, l) + #define FREE_TARGET (*me->target->isa->_free)(me->target) + + struct _HTStream { + const HTStreamClass * isa; ++ HTNet * net; + HTStream * target; + HTStream * orig_target; + HTFormat format; + HTStream * debug; /* For preamble and epilog */ + HTRequest * request; +- BOOL body; /* Body or preamble|epilog */ +- HTEOLState state; +- int dash; /* Number of dashes */ + char * boundary; +- char * bpos; ++ ++ BOOL keptcrlf; ++ int (*state)(HTStream *, const char *, int); ++ ++ char *boundary_ptr; ++ + }; + ++PRIVATE int HTBoundary_flush (HTStream * me); ++ + /* ------------------------------------------------------------------------- */ + ++PRIVATE int start_of_line (HTStream * me, const char * b, int l); ++PRIVATE int seen_dash (HTStream * me, const char * b, int l); ++PRIVATE int seen_doubledash (HTStream * me, const char * b, int l); ++PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l); ++PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l); ++PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l); ++PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l); ++PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l); ++PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra); ++PRIVATE int seen_nothing(HTStream * me, const char * b, int l); ++PRIVATE int seen_cr(HTStream * me, const char * b, int l); ++PRIVATE void process_boundary(HTStream *me, int isterminal); ++ ++#define UNUSED(l) (l=l) /* Shut up about unused variables */ ++ + PRIVATE int HTBoundary_put_block (HTStream * me, const char * b, int l) + { +- const char *start = b; +- const char *end = b; +- while (l-- > 0) { +- if (me->state == EOL_FCR) { +- me->state = (*b == LF) ? EOL_FLF : EOL_BEGIN; +- } else if (me->state == EOL_FLF) { +- if (me->dash == 2) { +- while (l>0 && *me->bpos && *me->bpos==*b) l--, me->bpos++, b++; +- if (!*me->bpos) { +- HTTRACE(STREAM_TRACE, "Boundary.... `%s\' found\n" _ me->boundary); +- me->bpos = me->boundary; +- me->body = YES; +- me->state = EOL_DOT; +- } else if (l>0) { +- me->dash = 0; +- me->bpos = me->boundary; +- me->state = EOL_BEGIN; +- } +- } +- if (*b == '-') { +- me->dash++; +- } else if (*b != CR && *b != LF) { +- me->dash = 0; +- me->state = EOL_BEGIN; +- } +- } else if (me->state == EOL_SLF) { /* Look for closing '--' */ +- if (me->dash == 4) { +- if (end > start) { +- int status = PUTBLOCK(start, end-start); +- if (status != HT_OK) return status; ++ /* ++ ** The HTBoundary object gets attached downstream of HTMime. ++ ** The HTBoundary object creates another HTMime object downstream of ++ ** the HTBoundary object. ++ ** ++ ** When we push data downstream to the second HTBoundary object, it ++ ** updates the bytes read count in the HTNet object. ++ ** ++ ** When we return to the parent HTMime object, itupdates the ++ ** bytes read count in the HTNet object again. Oops. ++ ** ++ ** Same thing happens with the consumed byte count. We can prevent ++ ** the consumed byte counts from being updated by temporary setting ++ ** the input channel stream pointer to NULL, but for the byte counts ++ ** we have to save them and restore them before existing. ++ ** ++ ** This bug was discovered by chance when a multipart/partial response ++ ** was partially received, and as a result of double-counting the ++ ** real response got cut off (because HTMime thought that more bytes ++ ** were processed than actually were, thus it processed only the ++ ** partial count of the remaining bytes in the response). When the ++ ** multipart/partial response was received all at once this bug did ++ ** not get triggered. ++ */ ++ ++ HTHost *host=HTNet_host(me->net); ++ HTChannel *c=HTHost_channel(host); ++ HTInputStream *i=HTChannel_input(c); ++ ++ long saveBytesRead=HTNet_bytesRead(me->net); ++ long saveHeaderBytesRead=HTNet_headerBytesRead(me->net); ++ ++ if (i) ++ HTChannel_setInput(c, NULL); ++ ++ HTTRACE(STREAM_TRACE, "Boundary: processing %d bytes\n" _ l); ++ /* Main loop consumes all input */ ++ ++ while (l) ++ { ++ int n= (*me->state)(me, b, l); ++ ++ if (n == 0) ++ return HT_ERROR; ++ b += n; ++ l -= n; ++ } ++ ++ if (i) ++ HTChannel_setInput(c, i); ++ HTNet_setBytesRead(me->net, saveBytesRead); ++ HTNet_setHeaderBytesRead(me->net, saveHeaderBytesRead); ++ ++ return HT_OK; ++} ++ ++/* ++** Start of line, keptcrlf=YES if we've kept the preceding CRLF from downstream ++** and we'll pass it along if we decide that this is not a boundary delimiter. ++*/ ++ ++PRIVATE int start_of_line (HTStream * me, const char * b, int l) ++{ ++ if (*b != '-') ++ return not_delimiter(me, b, l, 0); ++ ++ HTTRACE(STREAM_TRACE, "Boundary: start of line: input '-'\n"); ++ ++ me->state= seen_dash; ++ ++ return 1; ++} ++ ++/* ++** Line: - ++*/ ++ ++PRIVATE int seen_dash (HTStream * me, const char * b, int l) ++{ ++ if (*b != '-') ++ return not_delimiter(me, b, l, 1); ++ ++ HTTRACE(STREAM_TRACE, "Boundary: start of line: input '--'\n"); ++ ++ me->state= seen_doubledash; ++ me->boundary_ptr=me->boundary; ++ return 1; ++} ++ ++/* ++** Line: -- ++*/ ++ ++PRIVATE int seen_doubledash (HTStream * me, const char * b, int l) ++{ ++ me->state=seen_doubledash; ++ ++ if (*me->boundary_ptr) ++ { ++ if (*b != *me->boundary_ptr) ++ { ++ return not_delimiter(me, b, l, ++ me->boundary_ptr - me->boundary ++ + 2); + } +- HTTRACE(STREAM_TRACE, "Boundary.... Ending\n"); +- start = b; +- me->dash = 0; +- me->state = EOL_BEGIN; +- } +- if (*b == '-') { +- me->dash++; +- } else if (*b != CR && *b != LF) { +- me->dash = 0; +- me->state = EOL_BEGIN; +- } +- me->body = NO; +- } else if (me->state == EOL_DOT) { +- int status; +- if (me->body) { +- if (me->target) FREE_TARGET; ++ ++me->boundary_ptr; ++ return 1; ++ } ++ ++ /* ++ ** Line: --delimiter ++ */ ++ ++ if (*b == '-') ++ { ++ HTTRACE(STREAM_TRACE, ++ "Boundary: start of line: input '--%s-'\n" ++ _ me->boundary); ++ ++ me->state=seen_delimiter_dash; ++ return 1; ++ } ++ ++ HTTRACE(STREAM_TRACE, ++ "Boundary: Found: '--%s'\n" _ me->boundary); ++ ++ return seen_delimiter_nonterminal(me, b, l); ++} ++ ++/* ++** Line: --delimiter ++** ++** Waiting for CRLF. ++*/ ++ ++ ++PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l) ++{ ++ UNUSED(l); ++ ++ me->state=seen_delimiter_nonterminal; ++ if (*b == CR) ++ me->state=seen_delimiter_nonterminal_CR; ++ ++ return 1; ++} ++ ++/* ++** Line: --delimiter ++*/ ++ ++PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l) ++{ ++ HTTRACE(STREAM_TRACE, ++ "Boundary: Found: '--%s'\n" _ me->boundary); ++ ++ if (*b != LF) ++ return seen_delimiter_nonterminal(me, b, l); ++ ++ HTTRACE(STREAM_TRACE, ++ "Boundary: Found: '--%s'\n" _ me->boundary); ++ ++ process_boundary(me, NO); ++ return 1; ++} ++ ++/* ++** Line: --delimiter- ++*/ ++ ++PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l) ++{ ++ if (*b != '-') ++ return seen_delimiter_nonterminal(me, b, l); ++ ++ HTTRACE(STREAM_TRACE, ++ "Boundary: start of line: input '--%s--'\n" ++ _ me->boundary); ++ ++ me->state=seen_delimiter_terminal; ++ return 1; ++} ++ ++/* ++** Line: --delimiter-- ++*/ ++ ++PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l) ++{ ++ UNUSED(l); ++ ++ me->state=seen_delimiter_terminal; ++ ++ if (*b == CR) ++ me->state=seen_delimiter_terminal_CR; ++ return 1; ++} ++/* ++** Line: --delimiter-- ++*/ ++ ++PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l) ++{ ++ HTTRACE(STREAM_TRACE, ++ "Boundary: Found '--%s--'\n" ++ _ me->boundary); ++ ++ if (*b != LF) ++ return seen_delimiter_terminal(me, b, l); ++ HTTRACE(STREAM_TRACE, ++ "Boundary: Found '--%s--'\n" ++ _ me->boundary); ++ ++ process_boundary(me, YES); ++ return 1; ++} ++ ++/* ++** Beginning of the line does not contain a delimiter. ++** ++** ++** extra: Count of characters in a partially matched delimiter. Since it's ++** not a delimiter this is content that needs to go downstream. ++*/ ++ ++PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra) ++{ ++ HTTRACE(STREAM_TRACE, "Boundary: not a delimiter line\n"); ++ ++ if (me->keptcrlf) ++ { ++ HTTRACE(STREAM_TRACE, "Boundary: Sending previous line's \n"); ++ /* ++ ** Did not process CRLF from previous line, because prev CRLF ++ ** is considered a part of the delimiter. See MIME RFC. ++ */ ++ ++ me->keptcrlf=NO; ++ if (PUTBLOCK("\r\n", 2) != HT_OK) ++ return 0; ++ } ++ ++ /* ++ ** Potentially matched some of: --DELIMITER ++ */ ++ ++ if (extra) ++ { ++ HTTRACE(STREAM_TRACE, "Boundary: Sending partially-matched %d characters\n" _ extra); ++ ++ if (PUTBLOCK("--", extra > 2 ? 2:extra) != HT_OK) ++ return 0; ++ ++ if (extra > 2) ++ if (PUTBLOCK(me->boundary, extra-2) != HT_OK) ++ return 0; ++ } ++ return seen_nothing(me, b, l); ++} ++ ++/* ++** We're not looking for a delimiter. Look for the next line of input ++** in the data that could potentially be a delimiter. ++*/ ++ ++PRIVATE int seen_nothing(HTStream * me, const char * b, int l) ++{ ++ int i; ++ ++ me->state=seen_nothing; ++ ++ for (i=0; i 4 && ++ strncmp(b+i, "\r\n--", 4)) ++ continue; ++ break; ++ } ++ ++ if (i == 0) ++ { ++ /* Could only be a CR here. */ ++ ++ me->state=seen_cr; ++ return 1; ++ } ++ ++ HTTRACE(STREAM_TRACE, "Boundary: Processed %d (out of %d) bytes\n" ++ _ i _ l); ++ ++ if (PUTBLOCK(b, i) != HT_OK) ++ return 0; ++ ++ return i; ++} ++ ++/* ++** State: seen a CR ++*/ ++ ++PRIVATE int seen_cr(HTStream * me, const char * b, int l) ++{ ++ HTTRACE(STREAM_TRACE, "Boundary: Processed \n"); ++ ++ if (*b != LF) ++ { ++ HTTRACE(STREAM_TRACE, "Boundary: ... didn't follow\n"); ++ if (PUTBLOCK("\r", 1) != HT_OK) ++ return 0; ++ return seen_nothing(me, b, l); ++ } ++ ++ HTTRACE(STREAM_TRACE, "Boundary: Processed \n"); ++ me->state=start_of_line; ++ me->keptcrlf=YES; ++ return 1; ++} ++ ++PRIVATE void process_boundary(HTStream *me, int isterminal) ++{ ++ HTBoundary_flush(me); ++ if (me->target) FREE_TARGET; ++ me->target=NULL; ++ me->state=start_of_line; ++ me->keptcrlf=NO; ++ ++ if (!isterminal) + me->target = HTStreamStack(WWW_MIME,me->format, + HTMerge(me->orig_target, 2), + me->request, YES); +- if (end > start) { +- if ((status = PUTBLOCK(start, end-start)) != HT_OK) +- return status; +- } +- } else { +- if (me->debug) +- if ((status = PUTDEBUG(start, end-start)) != HT_OK) +- return status; +- } +- start = b; +- if (*b == '-') me->dash++; +- me->state = EOL_SLF; +- } else if (*b == CR) { +- me->state = EOL_FCR; +- end = b; +- } else if (*b == LF) { +- if (me->state != EOL_FCR) end = b; +- me->state = EOL_FLF; +- } +- b++; +- } +- return (startbody) ? PUTBLOCK(start, b-start) : HT_OK; + } + ++ + PRIVATE int HTBoundary_put_string (HTStream * me, const char * s) + { + return HTBoundary_put_block(me, s, (int) strlen(s)); +@@ -133,7 +427,9 @@ + + PRIVATE int HTBoundary_flush (HTStream * me) + { +- return (*me->target->isa->flush)(me->target); ++ if (me->target == NULL) ++ return HT_OK; ++ return (*me->target->isa->flush)(me->target); + } + + PRIVATE int HTBoundary_free (HTStream * me) +@@ -182,18 +478,26 @@ + HTResponse_formatParam(response) : + HTAnchor_formatParam(anchor); + char * boundary = HTAssocList_findObject(type_param, "boundary"); ++ ++ UNUSED(param); ++ UNUSED(input_format); ++ + if (boundary) { + HTStream * me; + if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL) + HT_OUTOFMEM("HTBoundary"); + me->isa = &HTBoundaryClass; +- me->request = request; ++ me->net = HTRequest_net(request); ++ me->request = request; + me->format = output_format; + me->orig_target = output_stream; + me->debug = HTRequest_debugStream(request); +- me->state = EOL_FLF; ++ ++ me->state = start_of_line; ++ me->keptcrlf=NO; ++ + StrAllocCopy(me->boundary, boundary); /* Local copy */ +- me->bpos = me->boundary; ++ + HTTRACE(STREAM_TRACE, "Boundary.... Stream created with boundary '%s\'\n" _ me->boundary); + return me; + } else { diff --git a/libwww-5.4.0-incdir.patch b/libwww-5.4.0-incdir.patch new file mode 100644 index 0000000..17e948f --- /dev/null +++ b/libwww-5.4.0-incdir.patch @@ -0,0 +1,15 @@ +--- w3c-libwww-5.3.2/libwww-config.in.inc Wed Feb 20 11:39:50 2002 ++++ w3c-libwww-5.3.2/libwww-config.in Wed Feb 20 11:42:17 2002 +@@ -45,7 +45,11 @@ + echo @VERSION@ + ;; + --cflags) +- echo -I@includedir@ -I@includedir@/@PACKAGE@ @DEFS@ ++ if [ "@includedir@" != "@prefix@/include" ]; then ++ echo -I@includedir@ -I@includedir@/@PACKAGE@ @DEFS@ ++ else ++ echo -I@includedir@/@PACKAGE@ @DEFS@ ++ fi + ;; + --libs) + echo -L@libdir@ @LWWWXML@ @LWWWZIP@ @LWWWWAIS@ @LWWWSQL@ -lwwwinit -lwwwapp -lwwwhtml -lwwwtelnet -lwwwnews -lwwwhttp -lwwwmime -lwwwgopher -lwwwftp -lwwwfile -lwwwdir -lwwwcache -lwwwstream -lwwwmux -lwwwtrans -lwwwcore -lwwwutils @LWWWMD5@ @LIBS@ diff --git a/libwww-5.4.0-libtool.patch b/libwww-5.4.0-libtool.patch new file mode 100644 index 0000000..6153341 --- /dev/null +++ b/libwww-5.4.0-libtool.patch @@ -0,0 +1,29 @@ +--- w3c-libwww-5.4.0/config/ltmain.sh.orig 2005-11-14 12:41:58.000000000 +0100 ++++ w3c-libwww-5.4.0/config/ltmain.sh 2005-11-14 12:42:51.000000000 +0100 +@@ -469,7 +469,7 @@ + pic_mode=default + ;; + esac +- if test $pic_mode = no && test "$deplibs_check_method" != pass_all; then ++ if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then + # non-PIC code in shared libraries is not supported + pic_mode=default + fi +@@ -2464,7 +2464,7 @@ + ;; + *) + # Add libc to deplibs on all other systems if necessary. +- if test $build_libtool_need_lc = "yes"; then ++ if test "$build_libtool_need_lc" = "yes"; then + deplibs="$deplibs -lc" + fi + ;; +@@ -2756,7 +2756,7 @@ + + # Test again, we may have decided not to build it any more + if test "$build_libtool_libs" = yes; then +- if test $hardcode_into_libs = yes; then ++ if test "$hardcode_into_libs" = yes; then + # Hardcode the library paths + hardcode_libdirs= + dep_rpath= diff --git a/libwww.spec b/libwww.spec new file mode 100644 index 0000000..5af0e36 --- /dev/null +++ b/libwww.spec @@ -0,0 +1,123 @@ +Name: libwww +Version: 5.4.0 +Release: 5mamba +Summary: General-purpose Web API written in C +Group: System/Libraries +Vendor: openmamba +Distribution: openmamba +Packager: Silvan Calarco +URL: http://www.w3.org/Library/ +Source: http://www.w3.org/Library/Distribution/w3c-%{name}-%{version}.tgz +Patch0: %{name}-5.4.0-can_2005_3183.patch +Patch1: %{name}-5.4.0-libtool.patch +Patch2: %{name}-5.4.0-incdir.patch +License: W3C License +## AUTOBUILDREQ-BEGIN +BuildRequires: glibc-devel +BuildRequires: libopenssl-devel +BuildRequires: libz-devel +## AUTOBUILDREQ-END +BuildRequires: libncurses-devel >= 5.4 +BuildRoot: %{_tmppath}/%{name}-%{version}-root + +# checking whether to support mysql access.... no + +%description +Libwww is a general-purpose Web API written in C for Unix and Windows (Win32). +With a highly extensible and layered API, it can accommodate many different types of applications including clients, robots, etc. +The purpose of libwww is to provide a highly optimized HTTP sample implementation as well as other Internet protocols and to serve as a testbed for protocol experiments. +Libwww also supports HTTPS, thru OpenSSL. + +%package apps +Group: Applications/Web +Summary: Applications built using Libwww web library +Requires: %{name} = %{version}-%{release} + +%description apps +Web applications built using Libwww: Robot, Command line tool, line mode browser. +The Robot can crawl web sites faster, and with lower load, than any other web walker that we know of, due to its extensive pipelining and use of HTTP/1.1. +The command line tool (w3c) is very useful for manipulation of Web sites that implement more than just HTTP GET (e.g. PUT, POST, etc.). +The line mode browser is a minimal line mode web browser; often useful to convert to ascii text. + +%package devel +Group: Development/Libraries +Summary: Static libraries and headers for %{name} +Requires: %{name} = %{version}-%{release} + +%description devel +Libwww is a general-purpose Web API written in C for Unix and Windows (Win32). +With a highly extensible and layered API, it can accommodate many different types of applications including clients, robots, etc. +The purpose of libwww is to provide a highly optimized HTTP sample implementation as well as other Internet protocols and to serve as a testbed for protocol experiments. +Libwww also supports HTTPS, thru OpenSSL. + +This package contains static libraries and header files need for development. + +%prep +%setup -q -n w3c-%{name}-%{version} +%patch0 -p1 +%patch1 -p1 +%patch2 -p1 + +%build +%configure \ + --enable-shared \ + --with-gnu-ld \ + --with-zlib \ + --with-regex \ + --with-md5 \ + --with-dav \ + --with-ssl \ + --with-exension + +%make -j1 + +%install +[ "%{buildroot}" != / ] && rm -rf "%{buildroot}" +%makeinstall + +chmod +x %{buildroot}%{_libdir}/lib{www*,xml*,md5}.so.0.* + +%clean +[ "%{buildroot}" != / ] && rm -rf "%{buildroot}" + +%post -p /sbin/ldconfig +%postun -p /sbin/ldconfig + +%files +%defattr(-,root,root) +%{_libdir}/*.so.* +%{_datadir}/w3c-libwww/* + +%files apps +%defattr(-,root,root) +%{_bindir}/w3c +%{_bindir}/webbot +%{_bindir}/www + +%files devel +%defattr(-,root,root) +%{_bindir}/libwww-config +%{_includedir}/*.h +%dir %{_includedir}/w3c-libwww/ +%{_includedir}/w3c-libwww/*.h +%{_libdir}/*.a +%{_libdir}/*.la +%{_libdir}/*.so +%doc *.html + +%changelog +* Mon Jan 03 2011 Silvan Calarco 5.4.0-5mamba +- rebuilt with openssl 1.0.0 + +* Thu Jun 12 2008 Silvan Calarco 5.4.0-4mamba +- specfile updated + +* Mon Jan 09 2006 Stefano Cotta Ramusino 5.4.0-3qilnx +- fixed package group and license + +* Fri Nov 11 2005 Davide Madrisan 5.4.0-2qilnx +- security update for CAN-2005-3183 (QiLinux bug#68) +- enabled support for OpenSSL + +* Thu Dec 02 2004 Davide Madrisan 5.4.0-1qilnx +- package created by autospec