#include "tmbstr.h" #include "httpio.h" int makeConnection ( HTTPInputSource *pHttp ) { struct sockaddr_in sock; struct hostent *pHost; /* Get internet address of the host. */ if (!(pHost = gethostbyname ( pHttp->pHostName ))) { return -1; } /* Copy the address of the host to socket description. */ memcpy (&sock.sin_addr, pHost->h_addr, pHost->h_length); /* Set port and protocol */ sock.sin_family = AF_INET; sock.sin_port = htons( pHttp->nPort ); /* Make an internet socket, stream type. */ if ((pHttp->s = socket (AF_INET, SOCK_STREAM, 0)) == -1) return -1; /* Connect the socket to the remote host. */ if (connect (pHttp->s, (struct sockaddr *) &sock, sizeof( sock ))) { if (errno == ECONNREFUSED) return ECONNREFUSED; else return -1; } return 0; } int parseURL( HTTPInputSource *pHttp, tmbstr url ) { int i, j = 0; ctmbstr pStr; pStr = tmbsubstr( url, "://" ); /* If protocol is there, but not http, bail out, else assume http. */ if (NULL != pStr) { if (tmbstrncasecmp( url, "http://", 7 )) return -1; } if (NULL != pStr) j = pStr - url + 3; for (i = j; url[i] && url[i] != ':' && url[i] != '/'; i++) {} if (i == j) return -1; /* Get the hostname. */ pHttp->pHostName = tmbstrndup (&url[j], i - j ); if (url[i] == ':') { /* We have a colon delimiting the hostname. It should mean that a port number is following it */ pHttp->nPort = 0; if (isdigit( url[++i] )) /* A port number */ { for (; url[i] && url[i] != '/'; i++) { if (isdigit( url[i] )) pHttp->nPort = 10 * pHttp->nPort + (url[i] - '0'); else return -1; } if (!pHttp->nPort) return -1; } else /* or just a malformed port number */ return -1; } else /* Assume default port. */ pHttp->nPort = 80; /* skip past the delimiting slash (we'll add it later ) */ while (url[i] && url[i] == '/') i++; pHttp->pResource = tmbstrdup (url + i ); return 0; } int fillBuffer( HTTPInputSource *in ) { if (0 < in->s) { in->nBufSize = recv( in->s, in->buffer, sizeof( in->buffer ), 0); in->nextBytePos = 0; if (in->nBufSize < sizeof( in->buffer )) in->buffer[in->nBufSize] = '\0'; } else in->nBufSize = 0; return in->nBufSize; } int openURL( HTTPInputSource *in, tmbstr pUrl ) { int rc = -1; #ifdef WIN32 WSADATA wsaData; rc = WSAStartup( 514, &wsaData ); #endif in->tis.getByte = (TidyGetByteFunc) HTTPGetByte; in->tis.ungetByte = (TidyUngetByteFunc) HTTPUngetByte; in->tis.eof = (TidyEOFFunc) HTTPIsEOF; in->tis.sourceData = (uint) in; in->nextBytePos = in->nextUnGotBytePos = in->nBufSize = 0; parseURL( in, pUrl ); if (0 == (rc = makeConnection( in ))) { char ch, lastCh = '\0'; int blanks = 0; char *getCmd = MemAlloc( 48 + strlen( in->pResource )); sprintf( getCmd, "GET /%s HTTP/1.0\r\nAccept: text/html\r\n\r\n", in->pResource ); send( in->s, getCmd, strlen( getCmd ), 0 ); MemFree( getCmd ); /* skip past the header information */ while ( in->nextBytePos >= in->nBufSize && 0 < (rc = fillBuffer( in ))) { if (1 < blanks) break; for (; in->nextBytePos < sizeof( in->buffer ) && 0 != in->buffer[ in->nextBytePos ]; in->nextBytePos++ ) { ch = in->buffer[ in->nextBytePos ]; if (ch == '\r' || ch == '\n') { if (ch == lastCh) { /* Two carriage returns or two newlines in a row, that's good enough */ blanks++; } if (lastCh == '\r' || lastCh == '\n') { blanks++; } } else blanks = 0; lastCh = ch; if (1 < blanks) { /* end of header, scan to first non-white and return */ while ('\0' != ch && isspace( ch )) ch = in->buffer[ ++in->nextBytePos ]; break; } } } } return rc; } void closeURL( HTTPInputSource *source ) { if (0 < source->s) closesocket( source->s ); source->s = -1; source->tis.sourceData = 0; #ifdef WIN32 WSACleanup(); #endif } int HTTPGetByte( HTTPInputSource *source ) { if (source->nextUnGotBytePos) return source->unGetBuffer[ --source->nextUnGotBytePos ]; if (0 != source->nBufSize && source->nextBytePos >= source->nBufSize) { fillBuffer( source ); } if (0 == source->nBufSize) return EndOfStream; return source->buffer[ source->nextBytePos++ ]; } void HTTPUngetByte( HTTPInputSource *source, uint byteValue ) { if (source->nextUnGotBytePos < 16 ) /* Only you can prevent buffer overflows */ source->unGetBuffer[ source->nextUnGotBytePos++ ] = (char) byteValue; } Bool HTTPIsEOF( HTTPInputSource *source ) { if (source->nextUnGotBytePos) /* pending ungot bytes, not done */ return no; if ( 0 != source->nBufSize && source->nextBytePos >= source->nBufSize) /* We've consumed the existing buffer, get another */ fillBuffer( source ); if (source->nextBytePos < source->nBufSize) /* we have stuff in the buffer, must not be done. */ return no; /* Nothing in the buffer, and the last receive failed, must be done. */ return yes; }