Package org.fao.geonet.languages

Source Code of org.fao.geonet.languages.LanguageDetector

//===  Copyright (C) 2012 Food and Agriculture Organization of the
//===  United Nations (FAO-UN), United Nations World Food Programme (WFP)
//===  and United Nations Environment Programme (UNEP)
//===
//===  This program is free software; you can redistribute it and/or modify
//===  it under the terms of the GNU General Public License as published by
//===  the Free Software Foundation; either version 2 of the License, or (at
//===  your option) any later version.
//===
//===  This program is distributed in the hope that it will be useful, but
//===  WITHOUT ANY WARRANTY; without even the implied warranty of
//===  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//===  General Public License for more details.
//===
//===  You should have received a copy of the GNU General Public License
//===  along with this program; if not, write to the Free Software
//===  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//===
//===  Contact: Jeroen Ticheler - FAO - Viale delle Terme di Caracalla 2,
//===  Rome - Italy. email: GeoNetwork@fao.org
//==============================================================================
package org.fao.geonet.languages;

import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import jeeves.server.context.ServiceContext;
import org.fao.geonet.utils.Log;
import org.apache.commons.lang.StringUtils;
import org.fao.geonet.constants.Geonet;
import org.fao.geonet.kernel.DataManager;

/**
* Automatic language detector using http://code.google.com/p/language-detection/.
*
* @author heikki doeleman
*/
public class LanguageDetector {
   
    private static LanguageDetector instance;

    private static boolean profilesLoaded = false;
    private static boolean languageLevelSupported = false;
    private static String upgradeMessage;

    private LanguageDetector() {}
   
    static {
        String javaVersion = System.getProperty("java.version");
        // java < 1.6 not supported
        if(StringUtils.isNotEmpty(javaVersion) && javaVersion.startsWith("1.5")) {
            upgradeMessage = "You are running on Java " + javaVersion + ", auto-detecting languages is disabled. Upgrade to at least 1.6.";
            Log.warning(Geonet.LANGUAGEDETECTOR, LanguageDetector.upgradeMessage);
        }   
        else {
            LanguageDetector.languageLevelSupported = true;
        }
    }

    /**
     * TODO javadoc.
     *
     * @return instance
     * @throws Exception hmm
     */
    public static synchronized LanguageDetector getInstance() throws Exception {
        if(!LanguageDetector.languageLevelSupported) {
            throw new Exception(LanguageDetector.upgradeMessage);
        }
        if(instance == null) {
            instance = new LanguageDetector();
        }
        return instance;
    }

    /**
     * Helps ensure singleton-ness.
     *
     * @return nothing
     * @throws CloneNotSupportedException
     */
    @Override
    public Object clone() throws CloneNotSupportedException {
        throw new CloneNotSupportedException();
    }

    /**
     * Detects language of input string.
     *
     *
     * @param srvContext
     * @param input text to analyze
     * @return iso 639-2 code of detected language
     * @throws Exception hmm
     */
    public String detect(ServiceContext srvContext, String input) throws Exception {
        if(!LanguageDetector.languageLevelSupported) {
            throw new Exception(LanguageDetector.upgradeMessage);
        }
        Detector detector = DetectorFactory.create();
        detector.append(input);
        String detectedLanguage = detector.detect()
        // this is to deal with zh-cn and zh-tw in languageprofiles. All other files have a 639-1 2-char filename.
        if(detectedLanguage.length() > 2) {
            detectedLanguage = detectedLanguage.substring(0, 2);
        }
        String iso639_2 = srvContext.getBean(IsoLanguagesMapper.class).iso639_1_to_iso639_2(detectedLanguage);
        Log.debug(Geonet.LANGUAGEDETECTOR,
                    "detected language: " + iso639_2 +
                    " for text:" + input);

        return iso639_2;
    }


    /**
     * Creates mapping to ISO 639-2 for all languages supported by this language detector.
     *
     *
     *
     * @param path path to profiles directory
     * @throws Exception hmm
     */
    public static void init(String path) throws Exception {
        if(!LanguageDetector.languageLevelSupported) {
            throw new Exception(LanguageDetector.upgradeMessage);
        }
        if(!LanguageDetector.profilesLoaded) {
            //
            // initialize DetectorFactory. NOTE this can only happen once, otherwise an exception is thrown.
            //
            DetectorFactory.loadProfile(path);
            LanguageDetector.profilesLoaded = true;
        }
    }
}
TOP

Related Classes of org.fao.geonet.languages.LanguageDetector

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.