/*
 * $Log: fetch,v $
 * Revision 1.21  1999/04/05 14:16:49  joseph
 * Fixed bug when config_maxsess was set greater than MAXSESS
 * All tag values read from config file now passed through OS_GSTrans
 *
 * Revision 1.20  1998/09/02 19:21:22  joseph
 * Replaced internal mimemap stuff with calls to MimeMap module
 *
 * Revision 1.19  1998/08/30 18:31:48  joseph
 * Fetching of inline images may be specified more precisely
 *
 * Revision 1.18  1998/08/30 13:09:58  joseph
 * Fetching ordered is now more defined - pages closer to the root, and
 * actual pages (as opposed to inlines) at the same line level are favoured.
 *
 * Revision 1.17  1998/08/09 16:18:54  joseph
 * Fixed status window waiting count not getting updated at right time
 *
 * Revision 1.16  1998/08/09 15:17:23  joseph
 * Fixed bug where an open persistent connection would cause it to think
 * it was still fetching.
 *
 * Revision 1.15  1998/08/09 15:06:59  joseph
 * Added include/exclude lists for URLs
 *
 * Revision 1.14  1998/07/24 17:11:03  joseph
 * Added UI for adding URLs to a fetchfile
 *
 * Revision 1.13  1998/07/01 21:10:57  joseph
 * Added status window
 * Fixed persistent connections
 * Fixed magtags to only count processed url references
 * Fixed proxy_forunqualified, again.
 * Fixed leading/trailing spaces in urls...
 *
 * Revision 1.12  1998/06/16 18:29:38  joseph
 * Added recalculation of fetch portion for base href & redirections
 * Added processing of base href in fetch and rewrite
 * Moved file_truncate from rewrite to file
 * Logging of urls with unknown schemes
 * Errorlog file kept open across whole fetch
 * Log routines may handle effects of *close better.
 * Invalid chars in urls and hostnames replaced with ~'s on filesystem
 * Rewrite recovers from zero length log file
 * Removed permfail, softfail states as not used
 * Removed some unused bits from url structures
 * Changed proxy code to handle dotless hostnames okay.
 * Version to 0.06a.
 *
 * Revision 1.11  1998/06/14 21:24:37  joseph
 * Moved relative url resolver from rewrite / http -> misc and rewrote
 * Fixed rewrite leaving []'s all over log file.
 * Fixed misc_urltofilename barfing on url's like http://wiggle
 * Lessened processor time when fetching pages with _lots_ of links in them.
 * Make FetchError open/close at start/end of fetch like FetchLog
 * Fixed handling of ftp urls, with & without proxy set.
 * Fixed bugs in code to set fetchportion
 * URLs now moved to fetch list when fetched (whether sucessful or not)
 * Log file now says '[http]' or '[ftp]' for real link, as appropriate.
 * Recalculates fetch portion for redirected urls
 * Fixed handling of proxy for ftp (wasn't always using proxy)
 * url.c now uses 'times 2' allocator / deallocator.
 * url_handled corrected for ftp (returns true only if proxy set)
 * closes log file when fetch aborted.
 *
 * Revision 1.10  1998/06/06 21:44:55  joseph
 * Added ability to not fetch frames / inline images in fetch / config
 * wimpc_menuread moved to wimpclib
 * Now fades individual fetches when started, unfades all at end of fetch
 *
 * Revision 1.9  1998/06/05 22:55:17  joseph
 * URLs now moved to 'FETCHED' once they are fetched
 * Front end made proper
 * Added multiple fetch files + a menu in frontend
 * Fixed bugs with links containing url with spaces before them
 * Combined both makefilenames into one in misc.c
 * Added readtag and openfetch to http.
 * Keep FetchLog open for duration of fetch
 * Logfile now contains links to the orignal pages too
 * Added url_handled, returns true if the url is one we know how to fetch
 * Added shift to fetch without rewriting, and adjust to dump all flex
 * lists to files. (#define URLDUMP)
 * Frontend animation now updates every 0.5 secs, and has new rewrite icon
 * Added stopping / aborting of fetch / rewrite
 *
 * Revision 1.8  1998/05/28 22:26:30  joseph
 * Altered to compile with debugging changes
 *
 * Revision 1.7  1998/05/28 19:41:11  joseph
 * Changed all printf()'s to debug_printf(())'s
 *
 * Revision 1.6  1998/05/27 22:34:32  joseph
 * Tidied to a state for calling from the wimp app
 *
 * Revision 1.5  1998/05/07 21:02:00  jogu
 * Better state machine in main loop
 * Addition of ability to be server (#define BE_SERVER)
 *
 * Revision 1.4  1998/04/05 11:34:31  jogu
 * Added processing of new format fetch file
 *
 * Revision 1.3  1998/03/21 20:31:08  jogu
 * Tidied up, added key read to prevent stealing all processor time.
 * Added checking of maxsess.
 *
 * Revision 1.2  1997/12/29 17:15:25  jogu
 * Modified #include's to compile across NFS
 *
 * Revision 1.1.1.1  1997/12/29 14:37:55  jogu
 * WebGet Initial CMS Ver
 *
 */


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <ctype.h>

#include "flex.h"

#include "url.h"

#include "fetchstruc.h"

#include "fdset.h"
#include "http.h"
#include "rewrite.h"
#include "my_string.h"
#include "config.h"
#include "macros.h"
#ifdef BE_SERVER
#include "server.h"
#endif
#include "debug.h"
#include "defines.h"
#include "status.h"
#include "fetchfile.h"

#include "fetch.h"

#include "sys/time.h"

/*typedef struct rule_s
{

} rule;
typedef struct rules_s
{
  int uses; // how many loaded url's are using this rule

} rules;
*/


httpfetch_t *httpfetch[MAXSESS];

static int httpfetch_maxsess;
static int httpfetch_requirepolling = 0;

/* we have two functions? one to call on poll, one to call on new data.
   we mark sockets waiting for data in fdset. we keep a count of those
     waiting for polling.
   Then, when we poll (or get an internet event? ):
     we select()
     if (select() or (no_poll > 0))
     loop (for all fetches)
     {
       save state.
       need polling?  yes => poll
       socket select? yes => newdata
       has state changed? yes => poll / newdata ?????
     }
*/

#include "kernel.h"
#include "swis.h"


static int httpfetch_stopping = 0;


void httpfetch_stop( void )
{
  httpfetch_stopping = 1;
}

void httpfetch_abort( void )
{
  int n;
  for (n=0; n<httpfetch_maxsess; n++)
  {
    if ( flex_size( (flex_ptr) &httpfetch[n]) > 0 )
      http_abort( n );
  }
}

int httpfetch_poll(void)
{
  fd_set readfds,
      writefds;
  int numready, n;
  struct timeval timeout;
  int working=0;

  for (n=0; n<httpfetch_maxsess; n++)
  {
    httpfetch_t *x = httpfetch[n];
    if ( flex_size( (flex_ptr) &httpfetch[n]) > 0 )
      if ( x->idle == 0 ) working=1;
  }

  for (n=0; n<httpfetch_maxsess; n++)
  {
    if ( working == 0 )
    {
      httpfetch_t *x = httpfetch[n];
      if ( flex_size( (flex_ptr) &httpfetch[n]) > 0 &&  x->sock != -1 )
        fdset_close( &x->sock );
      httpfetch_stopping = 0; /* must be stopped now */
    }
  }

  if ( fd_maxfdno < 0 && httpfetch_requirepolling <= 0 )
    return working; /* nothing to do, but still working */

  memcpy(&readfds, &fd_readfds, sizeof(fd_set));  /* copy master fd sets into
                                                    local copies */
  memcpy(&writefds, &fd_writefds, sizeof(fd_set));
  timerclear(&timeout); /* must do this, it may get corrupted by select() */

  numready = select(fd_maxfdno + 1, &readfds, &writefds, NULL, &timeout);

  if ( numready == 0 && httpfetch_requirepolling <= 0 )
    return working;  /* nothing to do , but still working */

#ifdef BE_SERVER
  debug_printf(("Checking server_socket\n"));
  if ( server_socket >= 0 && FD_ISSET( server_socket, &readfds ) )
    server_connection();
#endif

  debug_printf(("Searching httpfetch\n"));

  for (n=0; n<httpfetch_maxsess; n++)
  {
    httpfetch_t *x = httpfetch[n];
    if ( flex_size( (flex_ptr) &httpfetch[n]) > 0 )
    {
      /* there are only 3 things we could be doing:
         i) waiting for a resolve
         ii) waiting for connect to finish
         iii) waiting for a response
***      iv) waiting to be able to send a request - move socket to writefds?
       */
      if (  (x->sock!=-1 && numready>0 && (FD_ISSET(x->sock, &writefds) || FD_ISSET(x->sock, &readfds)))
           || numready<0 || x->poll  )
      {
        if (x->idle)
        {
          /* an idle session might be left with a persistent socket - close socket when
             other end closes the connection. */
          if (x->sock!=-1 && (FD_ISSET(x->sock,&readfds) || FD_ISSET(x->sock,&writefds)) )
            fdset_close(&x->sock);
          x->poll=0;
        }
        else
        /* either we have a ready socket, there's a broken one somewhere, or we've got an active dnsquery */
          http_pollsess(n, numready<0); /* last field true if we need to check for an error? */
      }
    }
  }

#ifdef BE_SERVER
  debug_printf(("Searching serversess\n"));

  for (n=0; n<server_maxsess; n++)
  {
    serversess_t *x = serversess[n];
    if ( flex_size( (flex_ptr) &serversess[n] ) > 0 )
    {
      if ( ( x->insock != -1 && numready > 0 && (FD_ISSET(x->insock, &writefds) || FD_ISSET(x->insock, &readfds)) )
           || numready<0 )
      {
        server_poll( n );
      }
    }
  }
#endif

  status_update();

  return working; /* must still be working or summat */
}

void httpfetch_setpoll(int no)
{
  httpfetch_t *x = httpfetch[no];

  if (x->poll) return;

  x->poll=1;
  httpfetch_requirepolling+=1;
}

void httpfetch_removepoll(int no)
{
  httpfetch_t *x = httpfetch[no];

  if (!x->poll) return;

  x->poll=0;
  httpfetch_requirepolling-=1;
}

int httpfetch_init(int maxsess)
{
  int x;
  for (x=0; x<MAXSESS; x++)
    if (!flex_alloc( (flex_ptr) &httpfetch[x], 0))
      return -1;

  if ( maxsess < 1       ) maxsess = 1;
  if ( maxsess > MAXSESS ) maxsess = MAXSESS;

  httpfetch_maxsess = maxsess;

  fdset_init();

  return 1;
}


/* Takes a url from one of the url lists, and adds it to the given fetchstruct,
 * then calls httpfetch_init to start it fetching
 */
static int httpfetch_start(int no, URL_LISTS where, url_t *url)
{
  char urltext[URL_MAXLEN];
  int proxy;
  int filetype;
  int linkdepthtogo, act_linkdepth;
  int noinlineimages;
  int inlineframes;
  int fetchportion, imagefetchportion;
  int ruleset;
  int cursize = flex_size( (flex_ptr) &httpfetch[no] );

  if (cursize != 0 && !httpfetch[no]->idle) return -1; /* the session already exists, and isn't idle. */

  /* must copy the url *first*, otherwise flex may move it */
  *urltext=0;
  strncat( urltext, ((char *) url) + sizeof(url_t), sizeof(urltext)-1);
  /****** remember, these must be preserved in http_init too ********/
  proxy     = url->proxy;
  filetype  = url->filetype;
  debug_printf(("%d : Starting url : '%s', depth to go = %d\n",no,urltext,url->linkdepthtogo));
  linkdepthtogo = url->linkdepthtogo;
  act_linkdepth = url->act_linkdepth;
  fetchportion = url->fetchportion;
  imagefetchportion = url->imagefetchportion;
  noinlineimages = url->noinlineimages;
  inlineframes = url->inlineframes;
  ruleset      = url->ruleset;
  url->fetching = 1; /* for efficiencies sake */
  url=NULL; /* it's invalid shortly. */

  /* claim storage for this session */
  {
    int wewant=sizeof(httpfetch_t)+strlen(urltext)+1;

    if ( !flex_extend( (flex_ptr) &httpfetch[no], wewant /*-cursize*/) )
    {
      debug_printf(("Flex_extend failed!!! :-( \n"));
      return -2;
    }
  }

  strcpy( ((char *) httpfetch[no])+sizeof(httpfetch_t), urltext );
  if (cursize==0)
  {
    /* init structure contents... */
    memset( httpfetch[no], 0, sizeof(httpfetch_t) );
    httpfetch[no]->sock = -1; /* 0 is a valid socket. :) */
  }
  httpfetch[no]->where     = where;
  httpfetch[no]->proxy     = proxy;
  httpfetch[no]->filetype  = filetype;
  httpfetch[no]->linkdepthtogo = linkdepthtogo;
  httpfetch[no]->act_linkdepth = act_linkdepth;
  httpfetch[no]->fetchportion = fetchportion;
  httpfetch[no]->imagefetchportion = imagefetchportion;
  httpfetch[no]->inlineframes = inlineframes;
  httpfetch[no]->noinlineimages = noinlineimages;
  httpfetch[no]->ruleset      = ruleset;

  if ( http_init( no,0 ) < 0 )
  {
    int what_happens_when_this_fails;
    return -1;
  }

  /* now get the url and mark it as fetching */
/*  {
    url_t *x;
    x=url_getinfo(where,urltext);
    x->fetching=1;
  } */

  return 0;
}

/* pass x as 0 for full kick, for <x> to kick session <x> (and upwards) */
int httpfetch_kick(int x)
{
  url_t *url;
  URL_LISTS where;
  int started = 0;
  /* try to find a free session */

  x = 0; /* check all sessions, to set http_fetching */

  if ( httpfetch_stopping ) return 0; /* stopping - don't start fetching any more urls */

  http_fetching = 0;

  while (x < httpfetch_maxsess)
  {
    if ( flex_size( (flex_ptr) &httpfetch[x] ) != 0 && !httpfetch[x]->idle )
    {
      http_fetching += 1; /* this url is fetching */
      x++;
      continue;
    }
    /* okay, there's a free session. is there something for it to do? */

    /* first check pending */
    url = url_getnext(PENDING); where=PENDING;

    if (!url) { url = url_getnext(FOUND); where=FOUND; }

    if (!url) return 0;

    if ( httpfetch_start(x, where, url) == 0 )
    {
      /* success */
      http_fetching += 1;
      http_waiting -= 1;
      started++;
    }

    /* NB. url is INVALID now. */
    x++;
  }
  return started;
}

void httpfetch_finalise(void)
{
  int n;

  for (n=0; n<httpfetch_maxsess; n++)
  {
    httpfetch_t *x = httpfetch[n];
    if ( flex_size( (flex_ptr) &httpfetch[n]) > 0 && x->sock != -1 )
      fdset_close( &x->sock );
  }
}


