/*
 * $Log: rewrite,v $
 * Revision 1.32  2000/02/11 18:51:42  joseph
 * Fixed bug with mailto: URLs getting rewritten
 *
 * Revision 1.31  1998/08/29 16:21:15  joseph
 * Unfetched URL's that aren't inline are left alone
 *
 * Revision 1.30  1998/08/23 19:26:03  joseph
 * URL prefix is now configurable
 * 'Cannot resolve' error message now include hostname
 * Bug with <a\nhref="..."> fixed
 *
 * Revision 1.29  1998/08/18 22:02:17  joseph
 * Changed to add a more informative message to the logfile if the error log
 * is empty
 *
 * Revision 1.28  1998/08/18 21:50:51  joseph
 * Removed debug_enable/disable's
 *
 * Revision 1.27  1998/08/18 21:20:28  joseph
 * Transferred logic for scanning url for tag to misc and made it more
 * tolerant of broken html (missing "s, etc)
 *
 * Revision 1.26  1998/08/16 15:13:56  joseph
 * Fixed rewriting of references to tags within the same page
 *
 * Revision 1.25  1998/08/16 14:10:52  joseph
 * Changed rewriting of urls to use relative urls
 *
 * Revision 1.24  1998/08/10 21:19:40  joseph
 * Fixed bug where <awiggle href= was interpreted as <a href=
 * Added <area ... href=""> tag for client side image maps
 *
 * Revision 1.23  1998/08/10 19:34:51  joseph
 * Allowed the http-equiv and content sections of a meta tag to be either
 * way round.
 *
 * Revision 1.22  1998/08/10 19:21:09  joseph
 * Added following / rewriting of <meta http-equiv="REFRESH"...> tag
 *
 * Revision 1.21  1998/07/01 21:10:58  joseph
 * Added status window
 * Fixed persistent connections
 * Fixed magtags to only count processed url references
 * Fixed proxy_forunqualified, again.
 * Fixed leading/trailing spaces in urls...
 *
 * Revision 1.20  1998/06/16 18:29:38  joseph
 * Added recalculation of fetch portion for base href & redirections
 * Added processing of base href in fetch and rewrite
 * Moved file_truncate from rewrite to file
 * Logging of urls with unknown schemes
 * Errorlog file kept open across whole fetch
 * Log routines may handle effects of *close better.
 * Invalid chars in urls and hostnames replaced with ~'s on filesystem
 * Rewrite recovers from zero length log file
 * Removed permfail, softfail states as not used
 * Removed some unused bits from url structures
 * Changed proxy code to handle dotless hostnames okay.
 * Version to 0.06a.
 *
 * Revision 1.19  1998/06/14 21:24:37  joseph
 * Moved relative url resolver from rewrite / http -> misc and rewrote
 * Fixed rewrite leaving []'s all over log file.
 * Fixed misc_urltofilename barfing on url's like http://wiggle
 * Lessened processor time when fetching pages with _lots_ of links in them.
 * Make FetchError open/close at start/end of fetch like FetchLog
 * Fixed handling of ftp urls, with & without proxy set.
 * Fixed bugs in code to set fetchportion
 * URLs now moved to fetch list when fetched (whether sucessful or not)
 * Log file now says '[http]' or '[ftp]' for real link, as appropriate.
 * Recalculates fetch portion for redirected urls
 * Fixed handling of proxy for ftp (wasn't always using proxy)
 * url.c now uses 'times 2' allocator / deallocator.
 * url_handled corrected for ftp (returns true only if proxy set)
 * closes log file when fetch aborted.
 *
 * Revision 1.18  1998/06/08 16:39:40  joseph
 * Upped number of lines processed per call
 *
 * Revision 1.17  1998/06/077 19:41:32  joseph
 * Fixed up proxy code so it actually works
 * Improved error handling
 * If a fetch is repeatedly aborted by the server during a download, we
 * now keep the resulting file and mark it as incomplete in the logfile.
 *
 * Revision 1.16  1998/06/05 22:55:17  joseph
 * URLs now moved to 'FETCHED' once they are fetched
 * Front end made proper
 * Added multiple fetch files + a menu in frontend
 * Fixed bugs with links containing url with spaces before them
 * Combined both makefilenames into one in misc.c
 * Added readtag and openfetch to http.
 * Keep FetchLog open for duration of fetch
 * Logfile now contains links to the orignal pages too
 * Added url_handled, returns true if the url is one we know how to fetch
 * Added shift to fetch without rewriting, and adjust to dump all flex
 * lists to files. (#define URLDUMP)
 * Frontend animation now updates every 0.5 secs, and has new rewrite icon
 * Added stopping / aborting of fetch / rewrite
 *
 * Revision 1.15  1998/05/29 16:49:15  joseph
 * Disabled if-modified and made changes to try to decrease machine load
 *
 * Revision 1.14  1998/05/28 22:26:31  joseph
 * Altered to compile with debugging changes
 *
 * Revision 1.13  1998/05/28 19:41:11  joseph
 * Changed all printf()'s to debug_printf(())'s
 *
 * Revision 1.12  1998/05/27 22:36:46  joseph
 * Added initialisers to some variables
 *
 * Revision 1.11  1998/05/11 16:59:19  jogu
 * Fix for file: urls written out - should only be one initial /
 *
 * Revision 1.10  1998/05/07 16:11:42  jogu
 * Added ability to add comments to rewrite_addlog.
 * Added recognition of 304 (page unchanged), made to output suitable comment.
 *
 * Revision 1.9  1998/04/09 18:33:06  jogu
 * Fixed bug, wrong html output if fetcherror didn't exist
 *
 * Revision 1.8  1998/04/09 18:06:00  jogu
 * Added transfer of 'FetchError' into html log file
 *
 * Revision 1.7  1998/04/06 16:45:32  jogu
 * Added fetch/rewrite of <body background="url">
 * Fixed typos in fetchend code
 *
 * Revision 1.6  1998/04/05 16:11:41  jogu
 * Fixed up http_softerror() a bit, added processing of <frame src="..">
 *
 * Revision 1.5  1998/04/05 11:25:50  jogu
 * Changed to output html log file
 *
 * Revision 1.4  1998/03/25 17:59:02  jogu
 * Added #include "file.h"
 *
 * Revision 1.3  1998/03/21 20:32:54  jogu
 * Tidied up, adding preserving of dates.
 *
 * Revision 1.2  1998/03/12 20:11:49  jogu
 * Bug fix for references of form <a name="xx" href="aa">
 *
 * Revision 1.1.1.1  1997/12/29 14:37:55  jogu
 * WebGet Initial CMS Ver
 *
 */


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#include "swis.h"

#include "defines.h"
#include "macros.h"
#include "config.h"
#include "my_string.h"
#include "file.h"
#include "version.h"
#include "debug.h"
#include "misc.h"
#include "webget.h"

#include "rewrite.h"
/* Read in the log.

   For each file:
    * Rename file to _tmp in !WebGet.Sites
    * Output out of orig. file name
    * 'Copy' text, checking for links as in http fetcher
    * Delete _tmp
    * Add entry to pages list

  Delete log
*/
static const char rewrite_tmpname[]=SITES_DIR"._tmp";
// static const char tmpname[]=SITES_DIR"._tmp";
static FILE *rewrite_logf = NULL;



static int rewrite_relativeurl(char *url, char *reference, FILE *output, eval_inline inline)
{
  char result[URL_MAXLEN], *ans;
  char name[FILEPATH_MAXLEN];
  char basename[FILEPATH_MAXLEN];
  char *ptr;
  char *ref;
  int prefixlen = strlen( config_localurlprefix );
  int ret;

  ans = misc_resolverelativeurl( url, reference, result, sizeof(result) - 1 );
  if ( !ans ) return 0; /* not an i/o error */

  /* remove trailing and leading spaces */
  while ( isspace(*ans) ) ans++;
  ptr = ans + strlen(ans) - 1;
  while ( isspace(*ptr) ) *ptr--=0;

  ref = strchr( ans, '#' );
  if ( ref ) *ref = 0; /* don't rewrite the reference section! */

  memcpy( name, config_localurlprefix, prefixlen + 1 );

  ret = misc_urltofilename( ans, name + prefixlen, sizeof(name) - prefixlen );
  if (ret==-10)
  {
    if (ref) *ref = '#';
    /* either unrecognised scheme, or ftp scheme when not ftp proxy set, write out as is */
    if ( fwrite( ans, 1, strlen(ans), output ) != strlen( ans ) ) return -1;
    return 0;
  }
  else if (ret<0)
  {
    debug_printf(("Error forming filename from url '%s'\n",ans));
    return 0; /* not a i/o error */
  }
  if ( inline == inline_not )
  {
    /* do we have the file? */
    if ( !file_exists( name + prefixlen ) )
    {
      if (ret) *ref = '#';
      /* file doesn't exist, leave url untouched (well, apart from making is absolute, anyway) */
      if ( fwrite( ans, 1, strlen(ans), output ) != strlen( ans ) ) return -1;
      return 0;
    }
  }

  for (ptr=name+prefixlen; *ptr; ptr++)
  {
    if (*ptr=='.') *ptr='/';
    else if (*ptr=='/') *ptr='.';
  }

  /* now convert base url */
  /* remove trailing and leading spaces */
  while ( isspace(*url) ) url++;
  ptr = url + strlen(url) - 1;
  while ( isspace(*ptr) ) *ptr--=0;

  ptr = strchr( ans, '#' );
  if ( ptr ) *ptr = 0; /* remove reference section */

  memcpy( basename, config_localurlprefix, prefixlen + 1 );

  ret = misc_urltofilename( url, basename + prefixlen, sizeof(basename) - prefixlen );
  if (ret==-10)
  {
    /* either unrecognised scheme, or ftp scheme when not ftp proxy set, write out as is */
    if ( fwrite( ans, 1, strlen(ans), output ) != strlen( ans ) ) return -1;
    return 0;
  }
  else if (ret<0)
  {
    debug_printf(("Error forming filename from url '%s'\n",ans));
    return 0; /* not a i/o error */
  }

  for (ptr=basename+prefixlen; *ptr; ptr++)
  {
    if (*ptr=='.') *ptr='/';
    else if (*ptr=='/') *ptr='.';
  }

  /* we now have local file:/ URLs in name and basename */
  debug_printf(("Base filename : '%s'\n",basename ));
  debug_printf(("ref  filename : '%s'\n",name ));

  {
    char *x = basename, *y=name;
    char out[FILEPATH_MAXLEN], *o = out, *oend = out + sizeof out;
    char *ptr;
    /* remove common prefix */
    while ( *x == *y && *x ) x++, y++;
    if ( *x )
    {
      while ( *x != '/' && x > basename ) x--, y--;
      if ( *x == '/' && *y == '/' ) x++, y++;
    }
    debug_printf(("Base differ : '%s'\n",x ));
    debug_printf(("ref  differ : '%s'\n",y ));

    /* work out relative reference. */
    /* move up from basename till we reach the common dir */
    ptr = x;
    while ( o + 3 < oend && (ptr = strchr( ptr, '/' ), ptr) )
      *o++ = '.', *o++ = '.', *o++ = '/', ptr++;

    /* and now move down to the new url */
    *o = 0;
    strncat( o, y, sizeof out - strlen(out) - 1 );
    debug_printf(("Relative path is : '%s'\n", out ));
    if ( fwrite( out, 1, strlen(out), output ) != strlen( out ) )
      return -1;
  }

//  if ( fwrite( name, 1, strlen(name), output ) != strlen( name ) )
//    return -1;

  if ( ref )
  {
    *ref = '#';
    if ( fwrite( ref, 1, strlen(ref), output ) != strlen(ref ) )

      return -1; /* write out reference if there was one */
  }

  return 0;
}


/* This procedure takes a tag and determines if it's one that should be rewritten or not.
 * If it can be rewritten, it is - otherwise, it's written to 'output', unchanged.
 */
static int rewrite_processtag( char *baseurl, char *tag, FILE *output, char *basehref_url, int basesize )
{
  char *lookfrom;
  char lastchar;
  int basehref = 0;
  eval_inline inline;

  lookfrom = misc_scantag( tag, strlen(tag), &basehref, &inline, &lastchar );
  /* the returns value of inline isn't actually used */

  if (!lookfrom || my_strncasecmp(lookfrom, "mailto:", sizeof("mailto:") - 1) == 0 ||
      my_strncasecmp(lookfrom, "javascript:", sizeof("javascript:") - 1) == 0)
  {
    if (lookfrom)
    {
      char *end = tag + strlen( tag );
      *end = lastchar; /* replace character after url (may be a 0) */
    }
    
    /* no matching tag, or js / mailto -  write out original */
    if ( fwrite(tag, 1, strlen(tag), output) != strlen(tag) ) return -1;
    return 0;
  }

  debug_printf(("Rewrite url : '%s'\n",lookfrom));

  /* write out section before url */
  if ( fwrite( tag, 1, lookfrom - tag, output ) != lookfrom-tag )
    return -1;

  /* if it's a <base href='....'>, record the url */
  if ( basehref )
  {
    *basehref_url = 0;
    strncat( basehref_url, lookfrom, basesize - 1 );
  }

  if ( *basehref_url && !basehref)
  {
    if ( rewrite_relativeurl( basehref_url, lookfrom, output, inline ) < 0 )
      return -1; /* only records < 0 on I/O error */
  }
  else
  {
    if ( rewrite_relativeurl( baseurl, lookfrom, output, inline ) < 0 )
      return -1; /* only records < 0 on I/O error */
  }

  {
    char *end = tag + strlen( tag );

    *end = lastchar; /* replace character after url (may be a 0) */

    /* write out anything after the url */
    if ( fwrite( end, 1, strlen(end), output ) != strlen( end ) )
      return -1;
  }

  return 0;
}


static int rewrite_checkfortags( char *baseurl, FILE *input, FILE *output, char *basehref, int basesize )
{
  static char tagbuff[1024] = "";
  static int intag=0;
  static char *tag = tagbuff;
  char buffer[1024];
  char *ptr;
  int amount;

  if ( input ) amount=fread(buffer, 1, sizeof(buffer), input);

  if ( !input || amount <= 0 )
  {
    tag = tagbuff;
    *tag = 0;
    intag = 0;
    return 0; /* finished */
  }

  ptr=buffer;
  do
  {
    if (*ptr=='<')
    {
      /* Start of tag. Write out any present one, stop copying to disc */
      if (intag)
      {
        *tag=0;
        if (fwrite(tagbuff, 1, strlen(tagbuff), output) != strlen(tagbuff)) return -1;
      }
      tag = tagbuff;
      *tag=0;
      intag=1;
      fputc('<', output);
    }
    else if (*ptr=='>')
    {
      /* End of tag. Are we in a tag? Yes -> process*/
      if (intag)
      {
        *tag=0;
        if ( rewrite_processtag( baseurl, tagbuff, output, basehref, basesize ) < 0 ) return -1;
        tag = tagbuff;
        *tag=0;
        intag=0;
      }
      fputc('>', output);
    }
    else if (intag)
    {
      if (tag >= (tagbuff+sizeof(tagbuff)-1) )
      {
        /* Buffer full. balls. Write out tag and abort. */
        *tag=0;
        debug_printf(("Filled tag buffer :-( (%s)\n",tag));
        if (fwrite(tagbuff, 1, strlen(tagbuff), output) < strlen(tagbuff)) return -1;
        tag = tagbuff;
        *tag=0;
        intag=0;
      }
      else
        *tag++=*ptr;
    }
    else
    {
      /* Nothing special. */
      fputc(*ptr, output);
    }
  } while (ptr++, --amount > 0);
  return 1; /* still working */
}

/* NB. Corrupts name */
static void rewrite_addlog( const char *url, char *name, char *comment )
{
  char scheme[10];
  char *ptr;

  if ( !rewrite_logf )
  {
    debug_printf((" Log file not open for append\n"));
    return;
  }
  ptr = strchr( url, ':' );
  if ( ptr )
  {
    *ptr = 0;
    *scheme = 0;
    strncat( scheme, url, sizeof(scheme) - 1 );
    *ptr = ':';
  }

  for ( ptr=name; *ptr; ptr++ )
  {
    if ( *ptr == '.' ) *ptr='/';
    else if ( *ptr == '/' ) *ptr='.';
  }
  if ( comment )
    fprintf( rewrite_logf, "  <li><a href=\"%s\">[%s]</a> <a href=\"%s%s\">%s</a> [%s]\n",
                           url, scheme, config_localurlprefix, name, url, comment );
  else
    fprintf( rewrite_logf, "  <li><a href=\"%s\">[%s]</a> <a href=\"%s%s\">%s</a>\n",
                           url, scheme, config_localurlprefix, name, url );
}

#define F_WRITE( _str, _f ) ( fwrite( _str, 1, sizeof(_str)-1, _f ) == sizeof(_str)-1 )
#define REWRITE_FETCHSTART "<!-- "TASK_NAME" fetch start -->\n"
#define REWRITE_FOOTERSTART "<!-- "TASK_NAME" footer start -->\n"
#define REWRITE_END "</ul>\n</p>\n"

#define REWRITE_HTMLHEAD \
    "<!doctype html public \"-//W3C//DTD HTML 4.0//EN\">\n" \
    "<html lang=\"en\">\n" \
    "<head>\n" \
    "  <title>WebGet download log</title>\n" \
    "</head>\n" \
    "<body bgcolor=\"white\" text=\"black\">\n" \
    "<h1 align=\"center\">\n" \
    "  WebGet - Files downloaded\n" \
    "</h1>\n"

#define REWRITE_HTMLFOOT \
    REWRITE_FOOTERSTART \
    "<hr noshade width=\"80%\" align=\"right\">\n" \
    "<div align=\"right\"><address>\n" \
    "Produced by WebGet " VERSION ",  Joseph Heenan 1997-8 \n" \
    "<br><a href=\"mailto:webget@ping.demon.co.uk\">webget@ping.demon.co.uk</a>\n" \
    "</address></div>\n" \
    "</body>\n" \
    "</html>\n" \


static int rewrite_createlog( void )
{
  /* no log. Just create standard header */
  FILE *out = fopen( LOG_DIR ".Log", "w" );
  if ( !out )
  {
    E_REPORT( "Could not create a new log file" );
    return -1;
  }

  if ( ! F_WRITE( REWRITE_HTMLHEAD REWRITE_FETCHSTART, out ) )
  {
    fclose( out );
    E_REPORT( "Error writing to log file");
    return -1;
  }
  rewrite_logf = out;

  return 0;
}



static int rewrite_preparelog( void )
{
  FILE *out;
  char line[1024];
  long int lasttag = -1, curline;

  if ( ! file_exists( LOG_DIR ".Log" ) )
    return rewrite_createlog();

  /* There is a log file, find last tag we wrote in there */
  debug_printf(("Opening present log file\n"));

  out = fopen( LOG_DIR ".Log", "r" );
  if ( !out )
  {
    E_REPORT( "Could not open log file for reading" );
    return -1;
  }

  do
  {
    curline = ftell( out ); /* want to record the _start_ of the tag! */
    if ( curline == -1 ) break;
    if ( !fgets( line, sizeof line, out ) ) break;
    if ( strcmp( line, REWRITE_FETCHSTART) == 0 )
      lasttag = curline;
    if ( strcmp( line, REWRITE_FOOTERSTART) == 0 )
    {
      lasttag = curline;
      break;
    }
  }
  while ( 1 );
  fclose( out );

  debug_printf(( "Last tag found at %ld\n", lasttag ));
  if ( lasttag == -1 )
  {
    E_REPORT("Corrupt log file (no tags found) - deleted!");
    return rewrite_createlog();
  }

  /* truncate log file to last tag */
  E_CHECK_RETURN( -1, file_truncate( LOG_DIR ".Log", lasttag ) );

  out = fopen( LOG_DIR ".Log", "a" );
  if ( ! F_WRITE( REWRITE_FETCHSTART, out ) )
  {
    fclose( out );
    return -1;
  }
  rewrite_logf = out;

  return 0;
}

static int rewrite_openlog( void )
{
  int fail;
  time_t now;
  char fetchtime[32];

  if ( rewrite_preparelog() < 0 || !rewrite_logf )
    return -1;

  /* add a bit of preamble */
  now = time( NULL );
  strftime( fetchtime, sizeof(fetchtime)-1, "%x %H:%M", localtime( &now ) );

  fail = fprintf( rewrite_logf, "<hr noshade width=\"80%%\" align=\"center\">\n" \
    "<p>\n" \
    "Fetched at %s\n" \
    "<ul>\n", fetchtime) < 0;
  if ( fail )
  {
    fclose( rewrite_logf );
    rewrite_logf = 0;
    return -1;
  }

  return 0;
}

/*
 * Appends a footer to the html log file
 */
static int rewrite_closelog( void )
{
  int success;

  if ( !rewrite_logf ) return -1;

  success = F_WRITE( REWRITE_HTMLFOOT, rewrite_logf );
  fclose( rewrite_logf );
  rewrite_logf = 0;
  _swix(OS_File, _INR(0,2), 18, LOG_DIR ".Log", FILETYPE_HTML);
  if ( !success)
    return -1;
  return 0;
}


/* returns: <0 file over. 0 = url to rewrite. >0 still working */
static int rewrite_readlog(FILE *logf, char *name, int namesize, char *url, int urlsize)
{
  /* Read from the log file until we get EOF or a proper line, or have looked at 15 lines */
  int lines=25;
  int cnt, filetype;
  char *ptr, *tmp=NULL, line[1024];

  *url=0;
  while (lines-- && (tmp=fgets(line, sizeof(line), logf)) != NULL)
  {
    cnt = 0; tmp = line;
    while ( cnt < 4 && *tmp )
    {
      if ( *tmp++ != ' ' ) continue;
      cnt++;
      while ( *tmp == ' ' ) tmp++; /* skip adjacent spaces */
    }
    if ( !cnt || (ptr=strchr(tmp,' '))==0 ) continue; /* not 5 spaces, can't be for us */
    *ptr=0;
    debug_printf(("url = %s\n",tmp));
    if ( misc_urltofilename( tmp, name, namesize ) < 0)
    {
      debug_printf(("Error from urltofilename!\n"));
      continue;
    }
    if ( atoi( ptr+1 ) != 200 )
    {
      int z = atoi( ptr + 1 );
      if ( z == 301 )
        rewrite_addlog( tmp, name, "Page moved - permanent" );
      else if ( z == 302 )
        rewrite_addlog( tmp, name, "Page moved - temporary" );
      else if ( z == 304 )
        rewrite_addlog( tmp, name, "Page unchanged" );
      else
      {
        char buf[80];
        sprintf( buf, "Unknown http reply %d", atoi( ptr+1 ) );
        rewrite_addlog( tmp, name, buf );
      }
      continue;
    }
    /* is there additional stuff after the http reply code? */
    ptr++;
    while ( isdigit( *ptr ) ) ptr++;
    while ( isspace( *ptr ) ) ptr++;

    /* Is it html? */
    if (_swix(OS_File, _INR(0,1) | _OUT(2), 17, name, &filetype))
    {
      debug_printf(("Failed to read filetype for '%s'\n",name));
      rewrite_addlog( tmp, name, *ptr ? ptr : NULL );
      continue;
    }
    filetype=(filetype & 0x000fff00) >> 8;
    if (filetype!=FILETYPE_HTML)
    {
      rewrite_addlog( tmp, name, *ptr ? ptr : NULL );
      continue;
    }
    strncat(url, tmp, urlsize-1);
    break;
  }
  if (tmp==NULL) return -1; /* eof */
  if (*url) return 0;
  return 1;
}

static int rewrite_openfiles( char *name, FILE **input, FILE **output, char *date )
{
//  debug_printf(("Renaming %s to %s\n",name,rewrite_tmpname));

  file_readstamp( name, date );

  remove( rewrite_tmpname );

  if ( rename(name, rewrite_tmpname) != 0 )
  {
    debug_printf(("Renaming to temporary file failed\n"));
    return -1;
  }
  if ( (*output = fopen(name,"wb")) == NULL )
  {
    rename(rewrite_tmpname, name);
    debug_printf(("Opening output file failed\n"));
    return -1;
  }
  if ( (*input = fopen(rewrite_tmpname,"rb")) == NULL )
  {
    fclose(*output);
    remove(name);
    rename(rewrite_tmpname, name);
    debug_printf(("Opening input file failed\n"));
    return -1;
  }
  return 0;
}

int rewrite_poll( int abort )
{
  static FILE *f = NULL, *output = NULL, *input = NULL, *fetch=NULL;
  static char name[FILEPATH_MAXLEN];
  static char url[URL_MAXLEN];
  static char basehref[URL_MAXLEN];
  static char date[5];
  static enum { IDLE, ADDFETCH, READLOG, OPENFILES, READPAGE, ADDERRORS, REWRITEDONE } state = IDLE;

  int ret;

  if ( abort )
  {
    if ( f ) fclose( f );
    if ( input ) fclose( input );
    if ( output ) fclose( output );
    if ( fetch ) fclose( fetch );
    if ( rewrite_logf ) fclose( rewrite_logf );
    f = input = output = fetch = rewrite_logf = NULL;
    state = IDLE;
    rewrite_checkfortags( NULL, NULL, NULL, basehref, sizeof basehref ); /* clear static data */
    return 0;
  }

  switch (state)
  {
    case IDLE:
      f = fopen(LOG_DIR".FetchLog","r");
      if (!f)
      {
        debug_printf(("Log file %s already open or doesn't exist in rewrite\n",LOG_DIR".FetchLog"));
/*    	if (remove(LOG_DIR".FetchLog") != 0)
        {
          debug_printf(("File delete failed!\n"));
          return -2;
        }*/
        return -1; /* nothing to do */
      }
      if ( rewrite_openlog() < 0 )
      {
        debug_printf(("error!\n"));
        fclose( f );
        f = NULL;
        return -1;
      }
      state=ADDFETCH;
      break;
      /* could fall through */

    case ADDFETCH:
      state=READLOG;
      /* fall through */

      case READLOG:
      ret = rewrite_readlog(f, name, sizeof(name), url, sizeof(url));
      if ( ret < 0 )
      {
        state=ADDERRORS;
        fclose( f );
        f = NULL;
        break;
      }
      else if (ret>0)
        break; /* still looking for a line */

      state=OPENFILES;
      break;
      /* could fall through */

    case OPENFILES:
      if ( rewrite_openfiles(name, &input, &output, date ) < 0 )
      {
        rewrite_addlog( url, name, NULL );
        state=READLOG;
        break;
      }
      state=READPAGE;
      *basehref = 0;
      /* fall through */

    case READPAGE:
      ret = rewrite_checkfortags( url, input, output, basehref, sizeof basehref );
      if (ret>0) break; /* still working */
      fclose(input);
      fclose(output);
      output = input = NULL;
      if (ret<0)
      {
        /* failure somewhere. remove output, rename tmp back to what it was */
        debug_printf(("Rewrite failed, replacing original\n"));
        remove(name);
        if (rename(rewrite_tmpname, name) != 0)
          debug_printf(("Could not replace original file\n"));
      }
      if (ret==0)
      {
        /* success */
    	/* Set type of output file to html */
    	_swix(OS_File, _INR(0,2), 18, name, FILETYPE_HTML);
        file_setstamp( name, date );
        rewrite_addlog( url, name, NULL );
    	if (remove(rewrite_tmpname) != 0)
      	  debug_printf(("Removing temporary file failed.\n"));
      }
      state = READLOG; /* start on another file */
      break;

    case ADDERRORS:
      if ( ! fetch )
      {
        fetch = fopen( LOG_DIR".FetchError", "r" );
        {
          if ( !rewrite_logf )
          {
            debug_printf((" Could not open log file for append\n"));
          }
          else
          {
            fseek( fetch, 0, SEEK_END );
            if ( ftell( fetch ) == 0 )
            {
              fclose( fetch );
              remove( LOG_DIR".FetchError" );
              fetch = NULL;
            }
            else
              fseek( fetch, 0, SEEK_SET );
            if ( fetch )
              fprintf( rewrite_logf, REWRITE_END "<p>\nThe following errors occurred during the fetch:\n<table>\n");
            else
              fprintf( rewrite_logf, REWRITE_END "<p>\nNo errors occurred during the fetch\n</p>\n");
          }
        }
        if ( ! fetch )
        {
          state = REWRITEDONE;
          break;
        }
      }
      if ( fetch )
      {
        /* read from error log */
        int lines=25, cnt;
        char *ptr, *tmp = NULL, line[1024];

        while ( lines-- && ( tmp = fgets(line, sizeof line, fetch) ) != NULL )
        {
          for (cnt=0, tmp=line; cnt<4 && *tmp; cnt+=(*tmp++)==' ');
          if ( cnt<4 || (ptr=strchr(tmp,' '))==0 ) continue; /* not 5 spaces, can't be for us */
          *ptr++=0;
          if ( ptr[strlen(ptr)-1] == '\n' ) ptr[strlen(ptr)-1]=0;
          debug_printf(("url = %s, error = %s\n",tmp,ptr));
          {
            if ( !rewrite_logf )
            {
              debug_printf((" Could not open log file for append\n"));
            }
            else
            {
              fprintf( rewrite_logf, "<tr><td>%s<td><a href=\"%s\">%s</a>\n", ptr, tmp, tmp );
            }
          }

        }
        if ( tmp == NULL )
        {
          debug_printf(("tmp is NULL, going to DONE\n"));
          state = REWRITEDONE;
          fclose( fetch );
          fetch = NULL;
          {
            if ( !rewrite_logf )
            {
              debug_printf((" Could not open log file for append\n"));
            }
            else
            {
              fprintf( rewrite_logf, "</table>\n</p>\n");
            }
          }
          break;
        }
        /* add line to html log */
      }
      break;

    case REWRITEDONE:
      rewrite_closelog();
      if ( remove( LOG_DIR".FetchLog" ) != 0 )
        debug_printf(( "Could not remove FetchLog\n" ));

      if ( remove( LOG_DIR".FetchError" ) != 0 )
        debug_printf(( "Could not remove FetchError\n" ));
      state = IDLE;
      return 0;
      break;
  }

  if ( state == IDLE )  return 0; /* finished */

  return 1; /* working */
}
