/* fetchfile.c
 *
 * WebGet: Acorn Web Fetcher and rewriter
 * Functions for opening and reading from fetch files.
 *
 *  Joseph Heenan, 1996-8
 * All rights reserved.
 *
 * $Log: fetchfile,v $
 * Revision 1.9  1998/10/05 19:46:53  joseph
 * Increased size of exclude/include lists in both config file and
 *   fetchfile.
 * Added some warnings when the size limit of items is reached
 *
 * Revision 1.8  1998/09/26 20:26:17  joseph
 * Fixed bug where images wouldn't be fetched
 *
 * Revision 1.7  1998/09/15 17:58:47  joseph
 * Added 'Launch URLs' and 'Open root' to icon bar menu
 *
 * Revision 1.6  1998/08/30 18:31:48  joseph
 * Fetching of inline images may be specified more precisely
 *
 * Revision 1.5  1998/08/30 13:09:58  joseph
 * Fetching ordered is now more defined - pages closer to the root, and
 * actual pages (as opposed to inlines) at the same line level are favoured.
 *
 * Revision 1.4  1998/08/12 21:41:10  joseph
 * Moved searchfordesc from addurl to fetchfile and made global
 *
 * Revision 1.3  1998/08/10 21:18:45  joseph
 * Fixed bug with links=dir when site is of form http://hostname
 *
 * Revision 1.2  1998/08/09 15:06:59  joseph
 * Added include/exclude lists for URLs
 *
 * Revision 1.1  1998/07/24 17:11:04  joseph
 * Added UI for adding URLs to a fetchfile
 *
 *
 */


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "swis.h"

#include "config.h"
#include "my_string.h"
#include "defines.h"
#include "macros.h"
#include "url.h"
#include "debug.h"
#include "ruleset.h"
#include "webget.h"
#include "misc.h"
#include "wimpclib.h"

#include "fetchfile.h"

#define fetchfile_header "# WebGet fetch file\n# Version "
#define fetchfile_version 1


typedef enum
{
  fetch_linkdepth,
  fetch_url,
  fetch_links,
  fetch_inlineimages,
  fetch_inlineframes,
  fetch_includefiles,
  fetch_excludefiles,
  fetch_ignore
}
fetchfile_type;


typedef struct
{
  const char       *tag;
  fetchfile_type   type;
}
fetch_file_tags;



static fetch_file_tags fetch_filetags[] = {
  { "linkdepth",              fetch_linkdepth    },
  { "links",                  fetch_links        },
  { "url",                    fetch_url          },
  { "inline-images",          fetch_inlineimages },
  { "frames",                 fetch_inlineframes },
  { "name",                   fetch_ignore       },
  { "include",                fetch_includefiles },
  { "exclude",                fetch_excludefiles },
  { NULL,                     fetch_ignore       },
};


int fetchfile_writehead( FILE *f )
{
  if ( fprintf( f, "%s%d\n", fetchfile_header, fetchfile_version ) < 0 )
    return -1;
  return 0;
}


FILE *fetchfile_open( const char *name )
{
  char line[1024];
  FILE *f = fopen( name, "r" );

  if ( !f )
  {
    E_REPORT( "Could not open fetch file!" );
    return NULL;
  }

  if ( fread( line, 1, sizeof(fetchfile_header)-1, f ) != sizeof(fetchfile_header)-1 )
  {
    E_REPORT( "Error reading fetch file header" );
    fclose(f);
    return NULL;
  }
  if ( memcmp( line, fetchfile_header, sizeof(fetchfile_header)-1 ) )
  {
    E_REPORT( "Invalid file header found reading fetch file" );
    fclose(f);
    return NULL;
  }
  if ( ! fgets( line, sizeof line, f ) )
  {
    E_REPORT( "Error reading fetch file version" );
    fclose(f);
    return NULL;
  }

  if ( atoi( line ) != fetchfile_version )
  {
    E_REPORT( "Fetch file version wrong\n" );
    fclose(f);
    return NULL;
  }

  return f;
}


char *fetchfile_readline( FILE *f, char *line, int size )
{
  int overlong = 0;

  if ( !f ) return NULL;

  overlong = 0;
  while ( fgets( line, size, f ) )
  {
    char *ptr = line + strlen(line) - 1;
    if ( *ptr != '\n' ) { overlong = 1; continue; }
    if ( overlong ) {
      overlong = 0;
      E_REPORT("Overlong line found reading fetchfile - ignored");
      continue;
    }
    if ( *line == '#' || !*line || *line == '\n' ) continue; /* skip comments, null line and blank lines */
    *ptr = 0;

    ptr = strchr( line, '=' );
    if ( !ptr )
    {
      char error[100];
      strcpy( error, "Error reading fetch file, no '=' in \"" );
      strncat( error, line, sizeof(error) - (strlen(line) + 1) - 1 );
      strcat( error, "\"" );
      E_REPORT( error );
      continue;
    }
    *ptr = 0;
    while ( isspace( *++ptr ) );
    return ptr;
  }

  return NULL;
}


int fetchfile_readtag( const char *fname, char *tag, char *buffer, int bufsize )
{
  FILE *f = fetchfile_open( fname );
  char line[1024];
  char *val;

  if ( !f ) return -1; /* error already reported by httpfetch_openfetch() */

  while ( val = fetchfile_readline( f, line, sizeof line ), val )
  {
    if ( my_strcasecmp( tag, line ) != 0 ) continue;

    *buffer = 0;
    strncat( buffer, val, bufsize - 1 );
    fclose( f );
    return 0;
  }

  fclose( f );

  return -1;
}

static int fetchfile_calcfetchp( const char *url, conf_followlinks links, int *none )
{
  const char *ptr, *end;
  int cnt;

  if ( none )
    *none = 0;

  if ( links == links_all )
  {
    return 0;
  }

  if ( links == links_none && none )
  {
    *none = 1;
    return 0;
  }

  if ( links == links_host )
  {
    cnt = 0;
    ptr = url;
    while ( cnt < 3 && *ptr )
      if ( *ptr++ == '/' ) cnt++; /* find 3rd / or end of string, whichever comes first */
    if ( !*ptr )
      ptr--; /* didn't find 3 /'s - point at last char */
    debug_printf(("ptr-url = %d, url = %s\n",ptr-url,url));
    return ptr - url;
  }

  /* otherwise assume 'dir' */
  ptr = url + strlen( url );
  end = url;
  cnt = 0;

  while ( cnt < 3 && *end )
    if ( *end++ == '/' ) cnt++; /* find 3rd / or end of string, whichever comes first */
  if ( !*end )
    end--; /* didn't find 3 /'s - point at last char */
  while ( ptr > end && *ptr != '/' ) ptr--; /* go back to last '/' */
  debug_printf(("ptr-url = %d, url = %s\n",ptr-url,url));

  return ptr - url;
}

static int fetchfile_addurl( char *url, conf_followlinks links, int depth, conf_followlinks inlineimages,
                             int inlineframes, const char *includefiles, const char *excludefiles)
{
  int ret = -1;
  int x = url_handled( url );
  int ruleset;
  int fetchp, imagefetchp;
  int noinlineimages;

  if ( x < 0 )
  {
    E_REPORT("Only http and ftp urls are supported");
    return 0; /* Only http / ftp */
  }
  else if ( x > 0 )
  {
    E_REPORT( "A ftp proxy must be set for ftp urls to be fetched" );
    return 0;
  }

  ruleset = ruleset_add( includefiles, excludefiles );
  if ( ruleset == -1 )
    return 0;

  fetchp = fetchfile_calcfetchp( url, links, NULL );
  imagefetchp = fetchfile_calcfetchp( url, inlineimages, &noinlineimages );

  switch ( links )
  {
    case links_all:
      fetchp = 0;
      break;

    case links_host:
    {
      int cnt = 0;
      char *ptr = url;
      while ( cnt < 3 && *ptr )
        if ( *ptr++ == '/' ) cnt++; /* find 3rd / or end of string, whichever comes first */
      if ( !*ptr )
        ptr--; /* didn't find 3 /'s - point at last char */
      debug_printf(("ptr-url = %d, url = %s\n",ptr-url,url));
      fetchp = ptr - url;
    } break;

    case links_dir:
    default:
    {
      char *ptr = url + strlen( url );
      char *end = url;
      int cnt = 0;
      while ( cnt < 3 && *end )
        if ( *end++ == '/' ) cnt++; /* find 3rd / or end of string, whichever comes first */
      if ( !*end )
        end--; /* didn't find 3 /'s - point at last char */
      while ( ptr > end && *ptr != '/' ) ptr--; /* go back to last '/' */
      debug_printf(("ptr-url = %d, url = %s\n",ptr-url,url));
      fetchp = ptr - url;
    } break;
  }

  ret = url_addto( PENDING, url, depth, fetchp, noinlineimages, inlineframes, ruleset, inline_not, 0, imagefetchp );

  if ( ret < 0 )
  {
    char error[256];
    strcpy( error, "Error adding url " );
    strncat( error, url, sizeof(error) - strlen( error ) - 1 );
    E_REPORT( error );
    return -1;
  }

  return 0;
}

#define FETCHFILE_MAXLEN 2048

int fetchfile_load( const char *fetch_file, fetchfile_action_type action )
{
  conf_followlinks links = config_links;
  int depth = config_linkdepth;
  conf_followlinks inlineimages = config_inlineimages;
  int inlineframes = config_inlineframes;
  char includefiles[ FETCHFILE_MAXLEN ];
  char excludefiles[ FETCHFILE_MAXLEN ];
  FILE *f = fetchfile_open( fetch_file );
  char line[ FETCHFILE_MAXLEN ];
  char *val;
  int urls = 0;

  *includefiles = 0;
  *excludefiles = 0;

  if ( !f ) return -1; /* error already reported by httpfetch_openfetch() */

  while ( val = fetchfile_readline( f, line, sizeof line ), val )
  {
    fetch_file_tags *tag = fetch_filetags;

    while ( tag->tag && my_strcasecmp( tag->tag, line ) != 0 ) tag++;

    if ( !tag->tag )
    {
      char error[100];
      strcpy( error, "Error reading fetch file, tag unknown (\"" );
      strncat( error, line, sizeof(error) - strlen(line) - 1 - 2 );
      strcat( error, "\")" );
      E_REPORT( error );
      continue;
    }

    switch ( tag->type )
    {
      case fetch_url:
        if ( ! *includefiles )
          strcpy( includefiles, "*" );

        urls++;
        switch ( action )
        {
          case FETCHFILE_LOAD:
            debug_printf(("Got url '%s'\n",val));
            if ( fetchfile_addurl( val, links, depth, inlineimages, inlineframes, includefiles, excludefiles ) < 0 )
            {
              fclose( f );
              return -1;
            }
            break;

          case FETCHFILE_OPENURL:
            url_launch( val );
            break;

          case FETCHFILE_OPENROOT:
          {
            const char cmd_start[] = "Filer_Run ";
            _kernel_oserror *e;
            char name[FILEPATH_MAXLEN], cmd[FILEPATH_MAXLEN + sizeof(cmd_start)];

            if ( misc_urltofilename( val, name, sizeof name ) < 0 )
              break;
            strcpy( cmd, cmd_start );
            e = _swix(OS_FSControl, _INR(0,5), 37, name, cmd+strlen(cmd), 0, 0, FILEPATH_MAXLEN );
            if ( e )
            {
              E_CHECK( e );
            }
            else
            {
              debug_printf(("Name = '%s'\n",cmd));
              E_CHECK( _swix( Wimp_StartTask, _IN(0), cmd ) );
            }
          }
            break;
        }
        break;

      case fetch_linkdepth:
        if ( !isdigit( *val ) )
        {
          debug_printf(( "%s is not an integer\n", val ));
          continue;
        }
        depth = atoi( val );
        break;

      case fetch_links:
        switch ( tolower(*val) )
        {
          case 'a': links = links_all;  break; /* all */
          case 'h': links = links_host; break; /* host */
          default:  links = links_dir;         /* dir */
        }
        break;

      case fetch_inlineimages:
        switch ( tolower(*val) )
        {
          case 'y':
          case 'a':
            inlineimages = links_all;
            break;
          case 'h':
            inlineimages = links_host;
            break;
          case 'n':
            inlineimages = links_none;
            break;
          default:
            inlineimages = links_dir;
        }
        break;

      case fetch_inlineframes:
        switch ( tolower(*val) )
        {
          case 'y':
            inlineframes = 1; break;
          default:  inlineframes = 0;
        }
        break;

      case fetch_includefiles:
        *includefiles = 0;
        strncat( includefiles, val, sizeof(includefiles) - 1 );
        break;

      case fetch_excludefiles:
        *excludefiles = 0;
        strncat( excludefiles, val, sizeof(excludefiles) - 1 );
        break;
    }
  }

  fclose(f);

  if ( urls == 0 )
  {
    E_REPORT( "No urls in fetch file!" );
    return -1;
  }

  return 0;
}



/* -1 -> couldn't find matching description. buffer contents are indeterminate.
 *  0 -> found matching desc, buffer contains filename
 */
int fetchfile_searchfordesc( const char *desc, char *buffer, int bufsize )
{
  int maxsize, numread, count = 0;
  char *fname;
  char descbuff[256];


  if ( bufsize < sizeof(WEBGET_FETCHDIR".") + 10 ) /* directory, plus a dot, plus a null, plus 10 chars of fname */
    return -1; /* buffer less than that size is naff all use, really. */

  strcpy( buffer, WEBGET_FETCHDIR "." );
  maxsize = bufsize - ( strlen(buffer) + 1 );
  fname = buffer + strlen( buffer );

  do
  {
    E_CHECK_RETURN( NULL, _swix( OS_GBPB, _INR(0,6) | _OUTR(3,4), 9,
                    WEBGET_FETCHDIR, fname, 1, count, maxsize, "*",
                    &numread, &count ) );

    if ( numread != 1 ) continue;
    debug_printf(("Trying file '%s'\n", fname));

    if ( fetchfile_readtag( buffer, "name", descbuff, sizeof descbuff ) < 0 )
      continue; /* well, if there's no 'name' tag, it can't match the desc we're looking for */

    debug_printf(("Comparing '%s' to '%s'\n", desc, descbuff ));

    if ( strcmp( desc, descbuff ) == 0 )
      return 0; /* found! */

  } while ( count != -1 ); /* End of while loop */

  return -1; /* can't have found it then... */
}
