/* pagedb.c
 *
 * WebGet: Acorn Web Fetcher and rewriter
 * Page database handler
 *
 * <A9> Joseph Heenan, 1996-8
 * All rights reserved.
 *
 * $Log: pagedb,v $
 * Revision 1.1  1998/10/03 15:26:05  joseph
 * Initial CVS version
 *
 *
 */

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include <stdlib.h>
#include "swis.h"

#include "defines.h"
#include "macros.h"
#define DEBUG
#include "debug.h"
#include "file.h"

#include "pagedb.h"

/* operations needed:
 *
 * add entry - first time page is fetched, we know last mod, last fetch
 * update entry - when refetching page. same as add entry?
 * get entry from filename - needed when we're freshening a site, to transform l
inks back to original state
 * get entry from url - for rewrite and sending if-mod-since
 * delete unused entries - removes pages not used in the last fetch
 * update db last fetch time
 *
 * to decide what pages haven't been used in a fetch, we see which haven't been
fetched since this fetch began
 * this means we update the 'last fetched' time even when we find a page is unch
anged
 * page states:
 *   updated in last fetch - page fetch time > db last fetch time
 *   check in last fetch, but not modified - page last check time > db last fetc
h time
 *   not used in last fetch - page last check time < db last fetch time
*/

// static int pagedb_maxfilenamelen = 10;
// static int pagedb_maxfileperdir = 77;
static const char pagedb_fheader[] = "# WebGet fetched pages db\n# Version ";
#define PAGEMAP_NAME ".Pages/txt"

static FILE *misc_openfile( const char *name, int *ver, const char *fheader, const char *mode )
{
  char line[1024];
  const int len = strlen( fheader );
  FILE *f = fopen( name, mode );

  if ( !f )
  {
    E_REPORT( "Could not open file!" );
    return NULL;
  }

  if ( fread( line, 1, len, f ) != len )
  {
    E_REPORT( "Error reading file header" );
    fclose(f);
    return NULL;
  }

  if ( memcmp( line, fheader, len ) )
  {
    E_REPORT( "Invalid file header" );
    fclose(f);
    return NULL;
  }

  if ( ! fgets( line, sizeof line, f ) )
  {
    E_REPORT( "Error reading file version" );
    fclose(f);
    return NULL;
  }

  *ver = atoi( line );

  return f;
}

typedef enum { PAGEDB_NOCREATE, PAGEDB_CREATE } pagedb_create_type;

/* returns:
   -1 for failure (eg. error, or create=NOCREATE and map doesn't exist)
    0 + *f pointing at end of header if successful (opened for update if create=CREATE
    1 + *f opening for writing if file doesn't exist and create=CREATE
 */
static int pagedb_open( const char *basedir, FILE **pagesf, pagedb_create_type create, time_t *t, unsigned int *val )
{
  char fname[ FILENAME_MAXLEN ];
  int  len = strlen( basedir );
  int  ver;
  char s1[10], s2[10];
  FILE *f;

  if ( len + sizeof PAGEMAP_NAME > sizeof fname )
  {
    E_REPORT("Page map filename too long");
    return NULL;
  }
  memcpy( fname, basedir, len );
  memcpy( fname + len, PAGEMAP_NAME, sizeof PAGEMAP_NAME );

  if ( !file_exists( fname ) )
  {
    *pagesf = NULL;

    if ( create == PAGEDB_NOCREATE )
      return -1; /* file doesn't exist */

    f = fopen( fname, "w" );
    if ( !f ||
         fprintf( f, "%s%d\n%.8x\n%.8x\n", pagedb_fheader, 1, time(NULL), 0 ) < 0 )
    {
      sprintf( fname, "Could not create page map %s", basedir );
      E_REPORT( fname );
      return -1;
    }
    *pagesf = f;

    return 1;
  }

  if ( f = misc_openfile( fname, &ver, pagedb_fheader, create==PAGEDB_CREATE?"r+":"r" ), f == NULL )
  {
    sprintf( fname, "Could not open page map in %s", basedir );
    E_REPORT( fname );
    return -1;
  }

  if ( ver != 1 )
  {
    E_REPORT( "Page map file version wrong\n" );
    fclose(f);
    return -1;
  }

  *pagesf = f;

  if ( !fgets( s1, sizeof s2, f ) || !fgets( s2, sizeof s2, f ) )
  {
    E_REPORT( "Error reading information elements from page map\n" );
    fclose(f);
    return -1;
  }

  if ( t )
    sscanf( s1, "%8x", t );

  if ( val )
    sscanf( s2, "%8x", val );

  return 0;
}

/* reads a line into buf from the db and sets pointers in page to the line read
*/

static int pagedb_readentry( FILE *f, pagedb_type *page, char *buf, size_t bufsize )
{
  char *ptr;
  int ret;

  ptr = fgets( buf, bufsize, f );

  if ( !ptr )
  {
    if ( feof( f ) )
    {
      /* eof */
      return 1;
    }

    /* error */
    return -1;
  }

  page->fname = buf;

  if ( ptr = strchr( buf, '\t' ), !ptr )
  {
    debug_printf(("Corrupt entry '%s' (no \\t)\n",buf));
    return -1;
  }

  *ptr++ = 0;
  if ( ret = sscanf( ptr, "%8x\t%8x\t%8x\t%8x",
          &page->lastmod, &page->lastfetch, &page->lastcheck, &page->expan.val ), ret != 4 )
  {
    debug_printf(("Error scanning '%s' for numeric fields.(sscanf = %d)\n",ptr,ret));
    return -1;
  }
  ptr += 4 * 8 + 3; /* move to trailing \t */
  if ( *ptr++ != '\t' )
  {
    debug_printf(("Corrupt fname entry '%s' (no \\t)\n",buf));
    return -1;
  }
  page->url = ptr;
  ptr = page->url + strlen( page->url ) - 1;
  if ( *ptr != '\n' )
  {
    debug_printf(("Overlong line '%s'\n",buf));
    return -1;
  }
  *ptr = 0;

  return 0;
}

static void pagedb_fnameinc( char *lastch, int *pageno )
{
  if ( (*pageno % 100) < 76 )
  {
    if ( *lastch < '9' )
    {
      (*lastch)++;
    }
    else
    {
      lastch[0] = '0';
      lastch[-1]++;
    }
    (*pageno)++;
  }
  else
  {
    *pageno = (*pageno - *pageno % 100) + 100;
    lastch[0] = '0';
    lastch[-1] = '0';
    if ( lastch[-3] < 9 )
    {
      lastch[-3]++;
    }
    else
    {
      lastch[-3] = '0';
      lastch[-4]++;
    }
  }
}

int pagedb_addentry( pagedb_type *page )
{
  FILE *f;
  int ret = pagedb_open( page->basedir, &f, PAGEDB_CREATE, NULL, NULL );
  long start;
  int pageno = 0;
  char *lastch;
  char *dbname;
  pagedb_type entry;
  char entrybuf[256];

  debug_printf(( "pagedb_open returned %d\n", ret ));

  if ( ret == -1 )
    return -1;

  start = ftell( f );
  if ( start == -1L )
  {
    fclose( f );
    return -1;
  }

  /* make up new filename - we'll check the db for the fname already
   * in use the same time we see if the url is already there */
  strcpy( page->fname, page->basedir );
  strcat( page->fname, ".Pages." );
  dbname = page->fname + strlen( page->fname ); /* pointer to actual page filename within db */
  strcat( page->fname, "00.00" );
  lastch = page->fname + strlen( page->fname ) - 1;

  debug_printf(("Trying filename '%s'(%d)\n",page->fname,pageno));

  while ( file_exists( page->fname ) )
    pagedb_fnameinc( lastch, &pageno );

  debug_printf(("Using filename '%s'(%d)\n",page->fname,pageno));

  if ( ret == 0 )
  {
    /* search for url */
    while ( ret = pagedb_readentry( f, &entry, entrybuf, sizeof entrybuf ), ret == 0 )
    {
      debug_printf(("Comparing '%s' to '%s'\n", page->url, entry.url));
      if ( !strcmp( page->url, entry.url ) )
      {
        int len = strlen( entry.fname ) + 1;
        /* url present -> read filename */
        debug_printf(("Found url '%s'\n",page->url));
        if ( len > page->fnamesize )
        {
          debug_printf(("Uh-oh. Out of space for filename...\n"));
          return -1;
        }
        memcpy( page->fname, entry.fname, len );
        /* update entry */
        { int sdfsdfsfsdfsf; }
      }

      if ( !strcmp( dbname, entry.fname ) )
      {
        /* filename present -> generate next filename in sequence, start from beginning again */
        debug_printf(("Filename clash for url '%s', fname '%s'\n", page->url, dbname ));
        pagedb_fnameinc( lastch, &pageno );
        if ( fseek( f, start, SEEK_SET ) != 0 )
        {
          fclose( f );
          debug_printf(("fseek() failed in pagedb_addentry\n"));
          return -1;
        }
      }
    } /* while readentry */

    if ( ret == -1 )
    {
      fclose( f );
      debug_printf(("pagedb_readentry returned -1\n"));
      return -1;
    }
  } /* if db exists */

  /* not present - append new entry */

  if ( fprintf( f, "%s\t" "%.8x\t" "%.8x\t" "%.8x\t" "%.8x\t" "%s\n",
                dbname,
                page->lastmod,
                page->lastfetch,
                page->lastcheck,
                page->expan.val,
                page->url ) < 0 )
  {
    fclose( f );
    debug_printf(("File write failed in pagedb_addentry\n"));
    return -1;
  }


  if ( fclose( f ) != 0 )
  {
    debug_printf(("File close failed in pagedb_addentry\n"));
    return -1;
  }

  return 0;
}

int pagedb_getbyurl( pagedb_type *page )
{
  FILE *f;

  if ( pagedb_open( page->basedir, &f, PAGEDB_NOCREATE, NULL, NULL ) == -1 )
    return -1;

  /* search for url */

  /* present -> return entry */

  /* not present - return failure */

  return 0;
}
