//SearchToHTML copyright (c) 1999 David Faden.
//The applet class files and source code are distributed as linkware...
//If you use this applet or a variant on its code,
//include a link to The Gilbert Post, 
//http://www.geocities.com/Athens/Parthenon/1911
//The Gilbert Post and David Faden take no responsibility
//for anything bad that happens as a result of using this applet.
//Please send reports of problems to gilbertnews@hotmail.com, anyway, though.
//

// Modification log:
//
// 4/12/2000 fixed a "Y2K bug reported by several alert users...  I am not sure what 
// I was thinking when I wrote the portion of code calling Date.getYear()... Perhaps that it
// returns the decade? Anyway, in reality, getYear() returns the number of years
// since 1900. Files with modification dates beyond 1999 were listed with dates greater than
// 99 (100 for 2000).
// Note: the whole Date class is deprecated in JDK 1.1
// The code actually changed is found in HDocSearcher.java.
// 
// 4/12/2000 added code that causes the HDocSearcher's runner Thread to wait
// when it is not "doing anything." This should be more efficient than in the
// previous incarnation, where runner would sleep, then periodically wake up to 
// see if there was anything to search.
//
// Jul 12 2000 I added a kludgy method to HDocSearcher which will
// finish extracting the title from a document even if a match is
// found within the title. I had been reminded of this behavior several
// times before, but it was Danny Narayan's complaint that spurred me to action.
// See HDocSearcher.finishTitle(StringBuffer).
// Finishing the title exposed another problem: the boolean inTITLE was not
// set to false even on finding the end of a title.
//
// Jul 13 2000 I seem to be writing a lot broken sentences in this bug log.
// But that okay.
// Changed the name of the method "foundNoMatch" to "receiveNoMatch." Again,
// I think that the former name was misleading. Added two new parameters to
// deal with expanded context capabilities: leadingContextLength and
// trailingContextLength - leadingContextLength is very misleadingly named.
// I will probably change it tomorrow. The new parameters I was alluding to
// are "leadingcontextlength" and "trailingcontextlength". Not yet documented! 
// I added a new method to HDocSearcher.java: appendTrailingContext(StringBuffer)
// and changed HDocSearcher's constructor in connection with the new trailing context
// stuff.
//
// Jul 15 2000 Fixed "bugs" in HDocSearcher.java that would cause an 
// ArrayOutOfBoundsException to be thrown if leadingContextLength==0. Previous to a few
// edits ago, I had required that this value be greather than zero so the code's
// assumption had been a safe one.
//
// Jul 25 2000 Fixed a bug in appendTrailingContext(StringBuffer). The fix required that
// the method not append directly from the input stream to the context (this was the source
// of the problem) so I renamed appendTrailingContext(StringBuffer) to getTrailingContext().
//
// Aug 17, 2000 Changed code to look directly for EOF and on EOF, to pass 
// '\0' to the SearchSieves rather than (char)-1. 

import java.io.*;
import java.net.*;
import java.util.*;

class HDocSearcher implements Runnable {
  private static final String[] months ={"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
  private BufferedInputStream bis=null;
  private SearchToHTML parent;
  private volatile boolean searching;
  private volatile boolean running=false;//should the main loop be running?
  private URL url;
  //if there is a horrible error such as a MalformedURLException
  //or a SecurityException refuse to search anymore...
  private volatile boolean noHorribleError;
  private int index;//our index according to parent
  private SearchSieve[] searchers;
  private boolean addedinfo=false;
  private Thread runner;//this DocSearcher's Thread
  private boolean cutHTML=false;
  private boolean bexact=false;
  
  /**
   * Array containing characters preceding and including a match.
   */
  private char[] context;
  
  /**
   * The actual number characters held in context (or one more).
   */ 
  private int contextsize;//allow this to grow past end of context so
  //that we will know if the context is completely filled.
  
  /**
   * Current position in the context array.
   */
  private int contextposition;
  
  private SearchSieve titlefinder=new SearchSieve("<title>".toCharArray(),false);
  private SearchSieve anchorfinder=new SearchSieve("<a name=".toCharArray(),false);
  private boolean gotTitle=false;
  
  //XXX! leadingContextLength _includes_ the match!
  /**
   * How many characters previous to the match should be returned.
   */
  private int leadingContextLength;
  
  /**
   * How many characters following the match should be returned.
   */
  private int trailingContextLength;
  
  /**
   * Used to synchronize search methods.
   */
  private final Object searchLock=new Object();
  
  public HDocSearcher(SearchToHTML parent,URL url,
            int index,int leadingContextLength, int trailingContextLength) {
    searching=false;
    this.parent=parent;
    this.url=url;
    noHorribleError=true;
    this.index=index;
    this.leadingContextLength = leadingContextLength;
    if (leadingContextLength<0)
        leadingContextLength=0;
    this.trailingContextLength = trailingContextLength;
    if (trailingContextLength<0)
        trailingContextLength=0;
    if (leadingContextLength>0)
        context=new char[leadingContextLength];
    else
        context=null;
    contextsize=0;
    contextposition=0;
  }
  
  public final void searchFor(String[] s, boolean bexact, boolean cutHTML) {
    stopSearch();
    synchronized (searchLock) {
        contextsize=0;
        contextposition=0;
        if(!noHorribleError) return;
        this.bexact=bexact;
        this.cutHTML=cutHTML;
        searchers=new SearchSieve[s.length];
        //It's not efficient to create new SearchSieves each time.
        //SearchSieve already provides a method, setKey(char[],boolean),
        //but I'm not sure how I'd like to implement the SearchSieve pool.
        for (int i=0;i<s.length;i++) {
           searchers[i]=new SearchSieve(s[i].toLowerCase().toCharArray(),bexact);
        }
        searching=true;
        //System.out.println("SearchThread:"+index+":"+searchstrings);
        if (!running) {
          running=true;
          runner=new Thread(this,"DocSearcher"+index+" runner");
          runner.start();//start runner
        }
        try {
            searchLock.notify();
        }
        catch (IllegalMonitorStateException imse) {
            imse.printStackTrace(System.err);
        }
    }
  }
  
  //do not use this method
  //it is a crutch to deal with a problem in the Applet's init
  public final void setErrored() {
    noHorribleError=false;
  }
  
  public final boolean isErrored() {
    return !noHorribleError;
  }
  
  //This might not lead to a graceful close of bis.
  //I think I've fixed the above problem, though.
  //This currently doesn't block - 
  //should it?
  public final void stopSearch() {
    searching=false;
  }
  
  public final void stopRunning() {
    stopSearch();
    synchronized (searchLock) {
        running=false;
        try {
            searchLock.notify(); //runner will start running once
            //the current Thread releases searchLock
        }
        catch (IllegalMonitorStateException imse) {
            imse.printStackTrace(System.err);
        }
    }
  }
  
  //where the searching gets done
  //if file exhausted or match found notify the parent
  public final void run() {
    synchronized (searchLock) {
        while (running && noHorribleError) {
           if(searching) {
             dosearch();
             searching=false;
           }
           try {
               searchLock.wait();
           }
           catch (InterruptedException ie) {
               ie.printStackTrace(System.err);
           }
        }
    }
  }
  
  private boolean openConnection() {
      //setup connection
    try{
       URLConnection uc=url.openConnection();
       if(!addedinfo) {
         addedinfo=true;
         Date d=new Date(uc.getLastModified());
         int length=uc.getContentLength()/1024;
         parent.addInfo(index,new String(length+"k,  "+months[d.getMonth()]+" "+
                                                d.getDate()+" "+(d.getYear()+1900)));
       }
       bis=new BufferedInputStream(uc.getInputStream());
    }
    catch(FileNotFoundException fnfe) {
      System.out.println(fnfe);
      closeConnection();
      noHorribleError=false;
      parent.receiveNoMatch(index);
      return false;
    }
    catch(IOException e) {
      System.out.println(e);
      closeConnection();
      parent.receiveNoMatch(index);
      return false;
    }
    catch(SecurityException se) {
      System.out.println(se);
      noHorribleError=false;
      closeConnection();
      parent.receiveNoMatch(index);
      return false;
    }
    return true;
  }
  
  private final void closeConnection() {
    try {
        if (bis!=null) 
            bis.close(); 
        bis=null;
    }
    catch (IOException e) {
        System.out.println(e);
    }
  }
  
  //dosearch should only be called from active run
  //so as to take advantage of multithreading.
  private final void dosearch() {
    //search loop
    char c,lowerc;
    int b;
    boolean inTag=false;
    boolean inANCHOR=false;
    String newestanchor="";
    boolean inTITLE=false;
    StringBuffer curranchor=new StringBuffer();
    StringBuffer title=new StringBuffer();
    anchorfinder.reset();
    titlefinder.reset();
    contextsize=0;
    contextposition=0;
    if (!openConnection()) 
        return;//problems taken care of by openConnection()
    for (;;) {
        if (!searching) 
            break;
       try {
         b = bis.read();
         if (b == -1)
            c = '\0';
         else
            c = (char) b;
       }
       catch(IOException e67) {
         System.out.println(e67);
         parent.receiveNoMatch(index); 
         closeConnection(); 
         return;
       }
       lowerc=Character.toLowerCase(c);
       if(!inANCHOR) inANCHOR=anchorfinder.addChar(lowerc);
       else {
         if (lowerc=='\"' || lowerc=='\'' || lowerc==' ' || lowerc=='>' ||
             lowerc=='\n' || lowerc=='\r' || lowerc=='\t' || c == '\0') {
           if (curranchor.length()>0) {
             newestanchor=curranchor.toString();
             inANCHOR=false;
           }
           curranchor.setLength(0);
         }
         else curranchor.append(c);
       }
       if(!gotTitle) {
         if (!inTITLE) 
            inTITLE=titlefinder.addChar(lowerc);
         else {
           if (lowerc=='<' || c == '\0') {
             if (title.length()>0) 
                parent.receiveTitle(title.toString(),index);
             gotTitle=true;
             inTITLE=false;
             title.setLength(0);
           }
           else 
            title.append(c);
         }
       }
       //XXX! JavaScripts and comments might louse up this
       //current cheesy method for cutting HTML tags.
       if (lowerc=='<')
           inTag=true;
       else if (lowerc=='>') {
           if (inTag && cutHTML) {
               inTag=false;
               continue;
           }
           inTag=false;
       }
       
       if (cutHTML && inTag) {/*nada*/}
       else {
         if (leadingContextLength>0) {
            if (contextsize<leadingContextLength) 
                contextsize++;
            context[contextposition]=c;
            contextposition++;
            contextposition%=context.length;
         }
         for (int i=0;i<searchers.length;i++) {
            if (searchers[i].addChar(lowerc)) {
              StringBuffer contextBuffer;
              if (leadingContextLength>0) {
                  String contextstr=getContext();
                  int len=searchers[i].getKeyLength();
                  if (bexact) 
                      contextstr=contextstr.substring(0,contextstr.length()-1);
                  int contextLen = contextstr.length();
                  if (len>contextLen) 
                      len=contextLen;
                  contextBuffer = new StringBuffer(contextLen+trailingContextLength+7);
                  contextBuffer.append(SearchToHTML.makeHTMLSafe(contextstr.substring(0,contextLen-len)));
                  contextBuffer.append("<b>");
                  contextBuffer.append(SearchToHTML.makeHTMLSafe(contextstr.substring(contextLen-len, 
                                                                                        contextLen)));
                  contextBuffer.append("</b>");
                  if (bexact) 
                     contextBuffer.append(c);
                  //XXX! For "exact" searches, the "leading" context will
                  //be one character too short and the "trailing" one too long.
              }
              else
                  contextBuffer = new StringBuffer(trailingContextLength);
              
              String trailingContext = getTrailingContext();
              contextBuffer.append(SearchToHTML.makeHTMLSafe(trailingContext));
              
              if (inTITLE) {
                  finishTitle(title);
                  parent.receiveTitle(title.toString(), index);
              }
              if (searching) 
                parent.receiveMatch(index,newestanchor,contextBuffer.toString());
              closeConnection();
              return;
            }
         }
       }
       if (b == -1) //EOF
           break;
    }
    //tell the parent the bad news
    parent.receiveNoMatch(index);
    closeConnection();
  }

    /**
     * The maximum extra length for a title.
     */
    private static final int TITLE_KILL_NUM = 300; //A completely arbitrary number.

    //XXX! Hack attack... uggh.
    /**
     * Read the rest of the title into titleBuf.
     * <br>
     * This method assumes that the connection is still open.
     * The calling method is responsible for broadcasting the 
     * title and for cleaning up the connection.
     *
     * @param titleBuffer StringBuffer to append the rest of the title to.
     */
    private final void finishTitle(StringBuffer titleBuffer) {
        int b;
        int numRead=0;
        while (numRead<TITLE_KILL_NUM) {
            try {
                b=bis.read();
                if (b=='<' || b==-1) //end of title or EOF
                    return;
                titleBuffer.append((char)b);
            }
            catch (IOException ioe) {
                ioe.printStackTrace();
                return;
            }
            numRead++;
        }
    }

    //XXX! Note that this method contains no sanity checks!
    //Hoo hoo hoo, hee hee hee hee, ha ha, hmmm...
    /**
     * Get <code>trailingContextLength</code> char from the document.
     * <br>
     * This method assumes that the connection is still open.
     * Closing the connection is left to the caller as well.
     * 
     * @return The trailing context.
     */
    private final String getTrailingContext() {
        int b;
        int lenAppended=0;
        boolean inTag=false;
        StringBuffer contextBuffer = new StringBuffer(trailingContextLength);
        while (lenAppended<trailingContextLength) {
            try {
                b = bis.read();
                if (b==-1) //EOF
                    break;
                if (b=='<')
                    inTag=true;
                else if (b=='>') {
                    inTag=false;
                    if (cutHTML)
                        continue;
                }
                if (cutHTML && inTag)
                    continue;
                contextBuffer.append((char)b);
                lenAppended++;
            }
            catch (IOException ioe) {
                ioe.printStackTrace();
                break;
            }
        }
        return contextBuffer.toString();
    }

  /**
   * Returns the last leadingContextLength chars to pass through 
   * the SearchSieves to give more of an idea to the user of what 
   * was around a match.
   * <br>
   * This is not set up to deal with the case that context is still changing as
   * it is asked for or concurrent calls to getContext()...reuses some variables.
   *
   * @return The context of the match.
   */
  private final String getContext() {
    if (leadingContextLength==0)
        return "";
    if (contextsize==context.length) {
      if (contextposition==0) 
        return new String(context);//already in the right order
      else {
        return new String(context,contextposition,context.length-contextposition) + 
               new String(context,0,contextposition);
      }
    } 
    else 
        return new String(context,0,contextposition);
  }
  
}
