Source Code of com.orientechnologies.orient.core.index.OIndexFullText

/*
 * Copyright 1999-2010 Luca Garulli (l.garulli--at--orientechnologies.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.orientechnologies.orient.core.index;


import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;


import com.orientechnologies.orient.core.db.record.ODatabaseRecord;
import com.orientechnologies.orient.core.db.record.OIdentifiable;
import com.orientechnologies.orient.core.db.record.ORecordElement;
import com.orientechnologies.orient.core.db.record.ORecordLazySet;
import com.orientechnologies.orient.core.metadata.schema.OType;
import com.orientechnologies.orient.core.record.impl.ODocument;
import com.orientechnologies.orient.core.serialization.serializer.OStringSerializerHelper;


/**
 * Fast index for full-text searches.
 * 
 * @author Luca Garulli
 * 
 */
public class OIndexFullText extends OIndexMVRBTreeAbstract {
  private static final String  CONFIG_STOP_WORDS    = "stopWords";
  private static final String  CONFIG_IGNORE_CHARS  = "ignoreChars";


  private static String        DEF_CLUSTER_NAME    = "FullTextIndex";
  private static String        DEF_IGNORE_CHARS    = " \r\n\t:;,.|+*/\\=!?[]()'\"";
  private static String        DEF_STOP_WORDS      = "the in a at as and or for his her " + "him this that what which while "
                                                      + "up with be was is";
  private String              ignoreChars          = DEF_IGNORE_CHARS;
  private Set<String>          stopWords;


  public OIndexFullText() {
    super("FULLTEXT");
    stopWords = new HashSet<String>(OStringSerializerHelper.split(DEF_STOP_WORDS, ' '));
  }


  public OIndexFullText(final String iName, final ODatabaseRecord iDatabase, final int[] iClusterIdsToIndex,
      final boolean iAutomatic) {
    this(iName, OType.STRING, iDatabase, DEF_CLUSTER_NAME, iClusterIdsToIndex, iAutomatic);
  }


  public OIndexFullText(final String iName, final OType iKeyType, final ODatabaseRecord iDatabase, final String iClusterIndexName,
      final int[] iClusterIdsToIndex, final boolean iAutomatic) {
    this();
    create(iName, OType.STRING, iDatabase, iClusterIndexName, iClusterIdsToIndex, null, iAutomatic);
  }


  /**
   * Index an entire document field by field and save the index at the end.
   * 
   * @param iDocument
   *          The document to index
   */
  public void indexDocument(final ODocument iDocument) {
    Object fieldValue;


    for (String fieldName : iDocument.fieldNames()) {
      fieldValue = iDocument.field(fieldName);
      put(fieldValue, iDocument);
    }


    acquireExclusiveLock();


    try {
      map.save();
    } catch (IOException e) {
      throw new OIndexException("Can't save index entry for document '" + iDocument.getIdentity() + "'");
    } finally {
      releaseExclusiveLock();
    }
  }


  /**
   * Indexes a value and save the index. Splits the value in single words and index each one. Save of the index is responsibility of
   * the caller.
   * 
   * @param iDocument
   *          The document to index
   */
  public OIndex put(final Object iKey, final OIdentifiable iSingleValue) {
    if (iKey == null)
      return this;


    Set<OIdentifiable> refs;
    final StringBuilder buffer = new StringBuilder();
    char c;
    boolean ignore;


    // GET ALL THE WORDS OF THE STRING
    final List<String> words = OStringSerializerHelper.split(iKey.toString(), ' ');


    // FOREACH WORD CREATE THE LINK TO THE CURRENT DOCUMENT
    for (String word : words) {
      buffer.setLength(0);


      for (int i = 0; i < word.length(); ++i) {
        c = word.charAt(i);
        ignore = false;
        for (int k = 0; k < ignoreChars.length(); ++k)
          if (c == ignoreChars.charAt(k)) {
            ignore = true;
            break;
          }


        if (!ignore)
          buffer.append(c);
      }


      word = buffer.toString();


      // CHECK IF IT'S A STOP WORD
      if (stopWords.contains(word))
        continue;


      checkForOptimization();
      acquireExclusiveLock();


      try {
        // SEARCH FOR THE WORD
        refs = map.get(word);
        checkForOptimization();
        if (refs == null)
          // WORD NOT EXISTS: CREATE THE KEYWORD CONTAINER THE FIRST TIME THE WORD IS FOUND
          refs = new ORecordLazySet(configuration.getDatabase()).setRidOnly(true);


        // ADD THE CURRENT DOCUMENT AS REF FOR THAT WORD
        refs.add(iSingleValue);


        // SAVE THE INDEX ENTRY
        map.put(word, refs);


      } finally {
        releaseExclusiveLock();
      }
    }
    return this;
  }


  public boolean remove(final Object iKey, final OIdentifiable value) {
    checkForOptimization();
    acquireExclusiveLock();
    try {


      final Set<OIdentifiable> recs = get(iKey);
      if (recs != null && !recs.isEmpty()) {
        if (recs.remove(value)) {
          map.put(iKey, recs);
          return true;
        }
      }
    } finally {
      releaseExclusiveLock();
    }
    return false;
  }


  @Override
  public ODocument updateConfiguration() {
    super.updateConfiguration();
    configuration.setInternalStatus(ORecordElement.STATUS.UNMARSHALLING);


    try {
      configuration.field(CONFIG_IGNORE_CHARS, ignoreChars);
      configuration.field(CONFIG_STOP_WORDS, stopWords);


    } finally {
      configuration.setInternalStatus(ORecordElement.STATUS.LOADED);
    }
    return configuration;
  }
}
Source Code of com.orientechnologies.orient.core.index.OIndexFullText

Related Classes of com.orientechnologies.orient.core.index.OIndexFullText