/*
 * Copyright 2006 TheLadders.com
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 */

/**
 * client-side indexing and searching.
 * 
 * @author Larry Ogrodnek <larry@theladders.com>
 */

// kind of hacky, might be better to have a conventional namespace include ala yui.
try
{
  eval("LADDERS");
}
catch(e)
{
  LADDERS = {};
}
    
LADDERS.search = {};

LADDERS.search =
{
    /**
     * Search Index.
	 *     
     * @constructor
     */
	index: function()
	{
	  this._index = {};
	  this._analyzer = new LADDERS.search.StandardAnalyzer();
	},

	/**
	 * A document to be indexed.
	 *
	 * @constructor
	 */
	document:function()
	{
		this._fields = {};
	},

    /**
     * Search hit.
     *
     * @constructor
     */
	hit: function()
	{
	  this._matchedFields = 0;
	  this._score = 0;
	},

    /**
     * StandardAnalyzer
     * @constructor
     */
	StandardAnalyzer: function()
	{
	},

	/**
	 * Default list of stop words.  These words will be removed from
	 * both indexed text and query strings.
	 */
	STOP_WORDS:
	[
	  "a", "an", "and", "are", "as", "at", "be", "but",
  	  "by", "for", "if", "in", "into", "is", "it", "no",
	  "no", "not", "of", "on", "or", "s", "such", "t",
	  "that", "the", "their", "they", "then", "there", 
  	  "these", "this", "to", "was", "will", "with"
	]
};


/**
 * Initialize stop words.
 */
LADDERS.search.StandardAnalyzer.prototype._stopWords = function(stopWords)
  {
    var result = {};
	  for (var i=0; i< stopWords.length; i++)
	  {
	    result[stopWords[i]] = 1;
	  }
	  return result;
  }(LADDERS.search.STOP_WORDS);


/**
 * Converts a string into an array of tokens, removing stopwords and punctuation.
 */
LADDERS.search.StandardAnalyzer.prototype.tokens = function(s)
{
  var results = new Array();

  var words = s.toLowerCase().replace(/-/g, " ").split(/\s+/);

  for (var i=0; i< words.length; i++)
  {
    var w = words[i].replace(/[,\.'"-]+/g, "");
	  
	if (w.length > 0 && (! this._stopWords[w]))
	{
	  results[results.length] = w;
	}
  }

  return results;
}



/**
 * Add a document to the index.
 *
 */
LADDERS.search.index.prototype.addDocument = function(d)
{
  for (var f in d._fields)
  {
    // only index string fields
    if (typeof d._fields[f] == 'string')
	{
	  var words = this._analyzer.tokens(d._fields[f]);

	  for (var i=0; i< words.length; i++)
	  {
	    var word = words[i];

	    var id = d._fields.id;	    
	    this._markOccurance(id, word);
      }  
    }
  }  
};

LADDERS.search.index.prototype._getOccurances = function(word)
{
  var c = word.charAt(0);
  
  if (! this._index[c])
  {
     this._index[c] = {};
  }
  
  if (! this._index[c][word])
  {
    this._index[c][word] = {};
  }
  
  return this._index[c][word];
};

LADDERS.search.index.prototype._markOccurance = function(id, word)
{
  var a = this._getOccurances(word);
  
  if (! a[id])
  {
    a[id] = 1;
  }
  else
  {
    a[id]++;
  }
};

/**
 * Index search.  
 *
 */
LADDERS.search.index.prototype.search = function(query)
{
  var words = this._analyzer.tokens(query);

  var results = {};

  var findWild = /(.+)\*$/;
		
  for (var i=0; i< words.length; i++)
  {
    var w;

	var match = words[i].match(findWild);
	
	// we have a keyword with a wildcard  
	if (match != null)
    {
	  w = this._escape(match[1]);

	  var wr = this.searchWildcard(w);
		   
      for (var j=0; j< wr.length; j++)
	  {
	    // new document match
	    if (! results[wr[j]])
	    {
	      results[wr[j]] = new LADDERS.search.hit();
	    }
			
  	    // increase document score		
		results[wr[j]]._matchedFields++;
		results[wr[j]]._score += 1;
      }

	  continue;
    }

    // we have a non-wildcard keyword
	w = this._escape(words[i]);
	  
	// in index?
	var docs = this._getOccurances(w);
	
	if (docs)
	{
	  for (var d in docs)
	  {
	    // new document match
	    if (! results[d])
	    {
	      results[d] = new LADDERS.search.hit();
	    }

		//increase document score
		results[d]._matchedFields++;
		results[d]._score += docs[d];
	  }
	}
  }

  var ar = new Array();
  
  // AND match, all words must match
  var needed = words.length;
  
  for (var r in results)
  {
    var hit = results[r];

	if (hit._matchedFields == needed)
	{
	  ar[ar.length] = r;
    }
  }

  return ar;
};


/**
 * Perform a "starts-with" match against the index.
 */
LADDERS.search.index.prototype.searchWildcard = function(word)
{
  var pattern = new RegExp(word.toLowerCase() + ".*", "g");

  var results = {};

  // go through all words in the index...
  
  var possible = this._index[word.charAt(0)];
  
  for (var k in possible)
  {
    if (pattern.test(k))
    {
	  for (var d in possible[k])
      {
        // always mark as one hit
	    results[d] = 1;
      }
    }
  }

  var ar = new Array();
  for (d in results)
  {
    ar[ar.length] = d;
  }	  

  return ar;
}

/**
 * escape our strings.
 */
LADDERS.search.index.prototype._escape =
  function(word)
  {
	  var quote = /([\\\!\@\#\$\%\^\&\.\+\?])/g;
	  return word.replace(quote, "\\$1");
  }
    
/**
 * Add a fieldName, fieldText pair to the document.
 *
 * Adding the same field more than once erases the previous contents.
 *
 */
LADDERS.search.document.prototype.add = 
	function(k, v)
	{
		this._fields[k] = v;
	};
  
/**
 * Get the fieldText for a given fieldName from a document.
 *
 */    
LADDERS.search.document.prototype.get =
	function(k)
	{
		return this._fields[k];
	};

