For more info, see http://www.xs4all.nl/~johnpc/inn --- article.c 1997/02/08 14:19:13 1.1 +++ article.c 1997/03/19 12:12:10 @@ -1,4 +1,4 @@ -/* $Revision: 1.1 $ +/* $Revision: 1.39 $ ** ** Article-related routines. */ @@ -8,6 +8,19 @@ #include "clibrary.h" #include "nnrpd.h" +/* + * OVERSCREAM - to make the overview database screaming fast, and because + * I scream in terror about the previous implementation. + * See http://www.xs4all.nl/~johnpc/inn/ for more information on this patch. + */ + +#define OVERSCREAM + +#ifdef OVERSCREAM +# include +# include +#endif /* OVERSCREAM */ + /* ** Data structures for use in ARTICLE/HEAD/BODY/STAT common code. @@ -61,11 +74,33 @@ /* ** Overview state information. */ +#ifdef OVERSCREAM + +STATIC caddr_t OVERshm = (caddr_t) NULL; /* location of mmap */ +STATIC size_t OVERsize; /* size of mmap */ +STATIC size_t OVERmsize; /* real size of mmap */ +STATIC int OVERfd; /* fd of file */ +STATIC ARTNUM OVERfirst, OVERlast; /* first/last entries */ +STATIC int OVERopens; /* Number of opens done */ +STATIC char* OVERcache; /* cached position */ +STATIC ARTNUM OVERprev; /* previous found art */ +#define LINSEARCH 5 /* linear search range */ +#define MIDSKEW 0.1 /* 10% bias toward middle */ + +STATIC int mmapsuck; /* did we syslog already */ +#define YOUR_MMAP_SUCKS if ( ! mmapsuck++ ) \ + syslog(L_NOTICE, "Your mmap() implementation sucks.") + +#else /* !OVERSCREAM */ + STATIC QIOSTATE *OVERqp; /* Open overview file */ STATIC char *OVERline; /* Current line */ STATIC ARTNUM OVERarticle; /* Current article */ STATIC int OVERopens; /* Number of opens done */ +#endif + + /* ** Read the overview schema. @@ -676,6 +711,10 @@ register int i; ARTOVERFIELD *fp; char *next; +#ifdef OVERSCREAM + char* eol = strchr(p, '\n'); +#endif + fp = &ARTfields[field - 1]; @@ -683,8 +722,13 @@ field = ARTfirstfullfield; /* Skip leading headers. */ - for (; --field >= 0 && *p; p++) + for (; --field >= 0 && *p && *p != '\n'; p++) +#ifdef OVERSCREAM + if ((p = memchr(p, '\t', OVERsize - (p - OVERshm))) == NULL || + p > eol ) +#else if ((p = strchr(p, '\t')) == NULL) +#endif return NULL; if (*p == '\0') return NULL; @@ -702,10 +746,22 @@ } /* Figure out length; get space. */ + +#ifdef OVERSCREAM + if ((next = memchr(p, '\t', OVERsize - (p - OVERshm))) != NULL && + p < eol ) + i = next - p; + else + i = eol - p; + +#else /* !OVERSCREAM */ + if ((next = strchr(p, '\t')) != NULL) i = next - p; else i = strlen(p); +#endif + if (buffsize == 0) { buffsize = i; buff = NEW(char, buffsize + 1); @@ -720,6 +776,430 @@ return buff; } +#ifdef OVERSCREAM + +/* + * helper function, search backwards in memory + */ + +STATIC char* +memrchr(p, c, l) + register char* p; + register char c; + register int l; +{ + for (; l--; --p) + if ( *p == c ) + return(p); + return(NULL); +} + +/* + * mmap an OVERVIEW file. + */ + +STATIC BOOL +OVERopen() +{ + char name[SPOOLNAMEBUFF]; + struct stat sb; + char* p; + static int pagesize = 0; + + /* return true if already mapped */ + if ( OVERshm ) { + return TRUE; + } + /* return false if already failed */ + if ( OVERopens++ ) { + return FALSE; + } + /* get memory pagesize if we don't have it already */ + if ( ! pagesize && ! +#ifdef _SC_PAGE_SIZE + (pagesize = sysconf(_SC_PAGE_SIZE)) +#else +# ifdef _SC_PAGESIZE + (pagesize = sysconf(_SC_PAGESIZE)) +# else + (pagesize = getpagesize()) +# endif +#endif + ) { + syslog(L_NOTICE, "%s: Can't getpagesize", ClientHost); + return FALSE; + } + /* mmap the file */ + (void)sprintf(name, "%s/%s/%s", _PATH_OVERVIEWDIR, GRPlast, _PATH_OVERVIEW); + if ( (OVERfd = open(name, O_RDONLY)) < 0 ) { + /* no overview file */ + syslog(L_NOTICE, "%s can't open %s: %m", ClientHost, name); + return FALSE; + } + if ( fstat(OVERfd, &sb) == -1 ) { + syslog(L_NOTICE, "%s can't stat %s: %m", ClientHost, name); + (void)close(OVERfd); + return FALSE; + } + if ( (OVERsize = sb.st_size) <= 1 ) { + syslog(L_NOTICE, "%s: %s is too small", ClientHost, name); + (void)close(OVERfd); + return FALSE; + } + OVERmsize = (OVERsize + pagesize - 1) & ~(pagesize - 1); + if ( (OVERshm = mmap(NULL, OVERmsize, PROT_READ, MAP_SHARED, OVERfd, 0)) + == (caddr_t) -1 ) + { + syslog(L_NOTICE, "%s can't mmap %s: %m", ClientHost, name); + (void)close(OVERfd); + OVERshm = NULL; + return FALSE; + } + /* get first entry */ + if ( (OVERfirst = atol((char*) OVERshm)) == 0 ) { + syslog(L_NOTICE, "%s: %s: bad format", ClientHost, name); + (void)munmap(OVERshm, OVERmsize); + (void)close(OVERfd); + OVERshm = NULL; + return FALSE; + } + + /* get last entry */ + if ( *(OVERshm + OVERsize - 1) != '\n' ) { + /* + * If you get here, then your mmap() implementation sucks. + * Go complain with your OS vendor, that their mmap() can't + * do mmap()ing of growing files properly. + * We try to find a decent record near the end, for the poor + * sobs without proper mmap. There are a lot of other places + * in the code with hacks for bad mmap(). Mainly because I'm + * one of the poor sobs :( + */ + YOUR_MMAP_SUCKS; + } + do { + /* + * Try to find any newline. If there isn't any, the entire file + * is crap. Normally this finds the newline right at the end. + */ + p = memrchr(OVERshm + OVERsize - 1, '\n', OVERsize - 1); + if ( p == NULL ) { + /* overview file only contains garbage. */ + (void)munmap(OVERshm, OVERmsize); + (void)close(OVERfd); + OVERshm = NULL; + return FALSE; + } + OVERsize = p - OVERshm + 1; + if ( (p = memrchr((char*) OVERshm + OVERsize - 2, '\n', + OVERsize - 2)) == NULL ) + { + /* Apparently only 1 (usable) line */ + OVERlast = OVERfirst; + OVERcache = NULL; + return TRUE; + } + OVERlast = atol(p+1); + } + while ( OVERlast == 0 && --OVERsize ); + + if ( !OVERsize ) { + (void)munmap(OVERshm, OVERmsize); + (void)close(OVERfd); + OVERshm = NULL; + return FALSE; + } + + OVERcache = NULL; + return TRUE; +} + +/* + * Close an overview file, if any. + */ + +void +OVERclose() +{ + if ( OVERshm ) { + if ( munmap(OVERshm, OVERmsize) == -1 ) { + syslog(L_NOTICE, "%s can't munmap: %m", ClientHost); + } + (void)close(OVERfd); + OVERshm = NULL; + } + OVERopens = 0; +} + +/* + * find an overview article using binary search in the overview file. + * Returns a pointer to the actual line in the overview file (so it's + * !!NOT!! null terminated, and can't be written to!!), or NULL on failure. + */ + +STATIC char* +OVERfind(artnum) + ARTNUM artnum; +{ + char* bottom; + char* top; + ARTNUM bottomnr; + ARTNUM topnr; + char* pos; + ARTNUM nr; + int i; + + /* default startpos */ + bottom = OVERshm; + bottomnr = OVERfirst; + top = OVERshm + OVERsize - 1; + topnr = OVERlast; + + if ( OVERcache ) { + /* + * for speedy sequential access. OVERcache, if non-NULL, points to + * the "next" entry. OVERprev is the previous article number found. + * Also check for sucking mmap() implementations. + */ + if ( *OVERcache == '\0' ) { + YOUR_MMAP_SUCKS; + OVERcache = memchr(OVERcache, '\n', + OVERsize - (OVERshm - OVERcache)); + if ( OVERcache == NULL || OVERcache == OVERshm + OVERsize - 1 ) { + OVERcache = NULL; + return NULL; + } + OVERcache++; + } + nr = atol(OVERcache); + if ( nr < OVERfirst || nr > OVERlast ) { + /* boo */ + OVERcache = NULL; + return NULL; + } + if ( nr == artnum ) { + pos = OVERcache; + goto bingo; /* calculate next OVERcache + return. (EW! a goto! :) */ + } + else if ( artnum > nr ) { + /* treat cache as first binary search */ + bottom = OVERcache; + bottomnr = nr; + } + else { + /* cache is first top */ + top = OVERcache - 1; + topnr = nr - 1; + if ( artnum > OVERprev ) { + /* + * optimization: we're searching for something that isn't + * in the database, but we want to keep the cache clean. + * this occurs when we think an article is there, but it + * really isn't, eg. because NOSCANDIR is on, or simply + * because the overview database leaks. + */ + return(NULL); + } + } + } + + /* sanity check */ + if ( artnum < bottomnr || artnum > topnr ) { + OVERcache = NULL; + return NULL; + } + + for (;;) { + /* + * This is the binary search loop, there are about a zillion + * exits so I found it neater to code it in an endless loop :) + * It simply continues until it is either found or it isn't... + * + * Note that we don't do a real binary search, but we guess + * a position using the fact that the overview database usually + * contains a reasonably linear range of articles, without any + * big leaps, but we skew it a bit towards the middle to prevent + * slow convergence in boundary cases (see also below). + * + * We switch to linear searching when we're "close", + * because on short ranges, linear searches are about as fast + * (or faster) anyway. LINSEARCH is currently guessed at 5, + * because on average it takes 2.5 searches using a linear search, + * where it usually takes 3 "straight" binary searches. + * + * Unfortunately, we can't be sure we get into linear search when + * we're close, because the database may have large holes. + */ + /* test if it's near the bottom */ + if ( artnum < bottomnr + LINSEARCH ) { + i = 0; + while ( artnum > bottomnr && i++ < LINSEARCH ) { + /* search next line */ + bottom = memchr(bottom, '\n', OVERsize - (bottom - OVERshm)); + if ( bottom == NULL || bottom == top + 1 ) { + /* reached end of file */ + OVERcache = NULL; + return NULL; + } + if ( *++bottom == 0 ) { + YOUR_MMAP_SUCKS; + continue; + } + bottomnr = atol(bottom); + if ( bottomnr < OVERfirst || bottomnr > OVERlast ) { + OVERcache = NULL; + return NULL; + } + } + if ( artnum == bottomnr ) { + pos = bottom; + goto bingo; /* calculate next OVERcache + return. */ + } + else { + /* didn't find it, but we came close. still cache position */ + OVERcache = bottom; + OVERprev = artnum; + return NULL; + } + /*NOTREACHED*/ + } + /* test if it's near the top */ + if ( artnum > topnr - LINSEARCH ) { + /* + * topnr is frequently guessed, so we must first determine it + * correctly. The fun part about searching backwards is that + * the next position (OVERcache) follows easily... + */ + i = 0; + do { + OVERcache = (top == OVERshm + OVERsize - 1) ? NULL : top + 1; + if ( (top = memrchr(--top, '\n', top - OVERshm)) + == NULL || top + 1 == bottom ) + { + /* search hit bottom */ + OVERcache = NULL; + return NULL; + } + if ( *(top + 1) == 0 ) { + YOUR_MMAP_SUCKS; + /* make sure we continue */ + topnr = artnum + 1; + continue; + } + topnr = atol(top + 1); + if ( topnr < OVERfirst || topnr > OVERlast ) { + OVERcache = NULL; + return NULL; + } + } + while ( artnum < topnr && i++ < LINSEARCH ); + if ( artnum == topnr ) { + /* bingo. This time we know OVERcache already */ + OVERprev = artnum; + return(top + 1); + } + else { + /* not found, but close. cache position */ + OVERprev = artnum; + return NULL; + } + /*NOTREACHED*/ + } + + /* + * now for the real binary search: + * Estimate the position of artnum, but with a small offset towards + * the middle, for better convergence in case the set of articles + * is non-linear (you get a straight binary search if MIDSKEW is 1.0). + * MIDSKEW is currently determined using a big thumb, occultism, + * astrology, cat /dev/uri-geller and some common sense (but not much) + * MIDSKEW == 0.0 makes the search take only 1 iteration in case + * the overview database is a monotonous array of lines with equal + * length, but can make for really lousy searches in anything not like + * the above, which, in the real world, is practically always. + * MIDSKEW == 1.0 gives you a true binary search without any guessing + * whatsoever. + * I thought 10% would be good enough. Only riggid testing can + * determine the optimal value, and then it still depends on a lot + * of settings, like expire times, user newsgroups preference, + * presence of cancelbots or cancelwars, frequency of expireover + * runs... need I say more? :) + */ + if ( topnr <= bottomnr ) { + /* Safety net. This REALLY should never happen. */ + syslog(L_NOTICE, + "%s: ASSERTION FAILED: %d < %d looking for %d in %s", + ClientHost, topnr, bottomnr, artnum, GRPlast); + } + pos = bottom + (int) ((double) (top - bottom) * (MIDSKEW * 0.5) + + (top - bottom) * (1.0 - MIDSKEW) * + (artnum - bottomnr) / (topnr - bottomnr)); + /* search forward for newline */ + if ( (pos = memchr(pos, '\n', OVERsize - (pos - OVERshm))) == NULL ) { + /* this shouldn't happen */ + OVERcache = NULL; + return NULL; + } + if ( pos == top ) { + /* hmm... */ + if ( (pos = memrchr(--pos, '\n', pos - OVERshm)) + == NULL || pos == bottom - 1 ) + { + /* + * This is what happens when there's a large hole and we're + * looking for something inside the hole (which isn't there). + * still record the position in this case... + */ + OVERcache = (top == OVERshm + OVERsize - 1) ? NULL : top + 1; + OVERprev = artnum; + return NULL; + } + } + /* see where we are */ + if ( *++pos == 0 ) { + YOUR_MMAP_SUCKS; + pos = memchr(pos, '\n', OVERsize - (pos - OVERshm)); + if ( pos == NULL || pos == OVERshm + OVERsize - 1 || pos == top ) { + OVERcache = NULL; + return NULL; + } + pos++; + } + nr = atol(pos); + if ( nr < OVERfirst || nr > OVERlast ) { + OVERcache = NULL; + return NULL; + } + if ( nr == artnum ) { + /* bingo. Set cache to next entry */ +bingo: + OVERcache = memchr(pos, '\n', OVERsize - (pos - OVERshm)); + if ( OVERcache == OVERshm + OVERsize - 1 ) + OVERcache = NULL; + else if ( OVERcache ) + OVERcache++; + OVERprev = artnum; + return (pos); + } + if ( nr < artnum ) { + /* found a new bottom */ + bottom = pos; + bottomnr = nr; + } + else /* nr > artnum */ { + /* + * found a new top. Setting topnr to nr-1 is not entirely + * correct, but who cares. (In fact we do care, but adjust + * later :) + */ + top = pos - 1; + topnr = nr - 1; + } + } + /*NOTREACHED*/ +} + +#else /* !OVERSCREAM */ /* ** Open an OVERVIEW file. @@ -756,7 +1236,7 @@ if (OVERqp != NULL) { QIOclose(OVERqp); OVERqp = NULL; - OVERopens = 0; + OVERopens = 0; /* this is a bug */ } } @@ -789,6 +1269,8 @@ return OVERarticle == artnum ? OVERline : NULL; } +#endif + /* ** Read an article and create an overview line without the trailing @@ -1078,12 +1560,22 @@ if (ARTfind(i) < 0) continue; + /*OVERVIEWcount++;*/ if (Opened && (p = OVERfind(i)) != NULL) { +#ifdef OVERSCREAM + char* eol = memchr(p, '\n', OVERsize - (p - OVERshm)); + if ( eol == NULL ) + continue; /* this should NEVER NEVER EVER NEVER EVER happen */ + fwrite(p, 1, eol - p, stdout); + fwrite("\r\n", 1, 2, stdout); +#else Printf("%s\r\n", p); +#endif continue; } (void)sprintf(buff, "%ld", i); + /*OVERGENcount++;*/ if ((p = OVERgen(buff)) != NULL) Printf("%s\r\n", p); }