Package org.opensextant.solrtexttagger

Examples of org.opensextant.solrtexttagger.TaggerFstCorpus


     * @param fstInfo the info about the corpus
     * @return the TaggerFstCorpus
     * @throws CorpusException if the requested corpus is currently not available
     */
    private TaggerFstCorpus obtainFstCorpus(Long indexVersion, CorpusInfo fstInfo) throws CorpusException {
        TaggerFstCorpus fstCorpus;
        synchronized (fstInfo) { // one at a time
            fstCorpus = fstInfo.getCorpus();
            if (fstCorpus == null) {
                if (fstInfo.isEnqueued()) {
                    throw new CorpusException("The FST corpus for language '"
                            + fstInfo.language + "' is enqueued for creation, but not yet "
                            + "available. Try at a  later point in time", null);
                }
                if (fstInfo.isFstCreationError()) {
                    throw new CorpusException(fstInfo.getErrorMessage(), null);
                }
                if (fstInfo.isFstFileError() && fstInfo.allowCreation) {
                    //try to recreate the FST corpus
                    if(config.getExecutorService() != null){
                        // TODO: this code should get moved to a CorpusManager class
                        config.getExecutorService().execute(
                            new CorpusCreationTask(config, fstInfo));
                        throw new CorpusException("The FST corpus for language '"
                                + fstInfo.language + "' was invalid and is now "
                                + "enqueued for re-creation. Retry at a  later "
                                + "point in time.", null);
                    } else {
                        throw new CorpusException(fstInfo.getErrorMessage(), null);
                    }
                }
            } else { //fstCorpus != null
                if(indexVersion != null && indexVersion.longValue() != fstCorpus.getIndexVersion()){
                    log.info("FST corpus for language '{}' is outdated ...", fstInfo.language);
                    if(fstInfo.isEnqueued()){
                        log.info("  ... already sheduled for recreation. "
                            + "Use outaded corpus for tagging");
                    } else if(fstInfo.allowCreation && config.getExecutorService() != null){
View Full Code Here


            this.enqueued = -1; //mark this one as up-to-date
        }
    }

    public TaggerFstCorpus getCorpus() {
        TaggerFstCorpus corpus = taggerCorpusRef == null ? null : taggerCorpusRef.get();
        if(corpus != null){
            //on first usage replace a WeakReference with a SoftReference
            if(taggerCorpusRef instanceof WeakReference<?>){
                taggerCorpusRef.clear();
                taggerCorpusRef = new SoftReference<TaggerFstCorpus>(corpus);
            }
        } else if(taggerCorpusRef != null){
            taggerCorpusRef = null; //reset to null as the reference was taken
        }
        if(corpus == null) {
            try { //STANBOL-1177: load FST models in AccessController.doPrivileged(..)
                corpus = AccessController.doPrivileged(new PrivilegedExceptionAction<TaggerFstCorpus>() {
                    public TaggerFstCorpus run() throws IOException {
                        if(fst.exists() && //if the file exists AND the file was not yet failing to load
                                //OR the file is newer as the last version failing to load
                                (!fstFileError || FileUtils.isFileNewer(fst, fstDate))){
                            TaggerFstCorpus corpus = TaggerFstCorpus.load(fst);
                            if(corpus != null){
                                //I need to set fstDate here, because I can not
                                //access lastModified() outside doPrivileged
                                fstDate = new Date(fst.lastModified());
                            }
View Full Code Here

        SolrCore core = indexConfig.getIndex();
        if(core.isClosed()){
            log.warn("Unable to build {} becuase SolrCore {} is closed!",fstInfo,core.getName());
            return;
        }
        final TaggerFstCorpus corpus;
        RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
        try { //STANBOL-1177: create FST models in AccessController.doPrivileged(..)
            final SolrIndexSearcher searcher = searcherRef.get();
            //we do get the AtomicReader, because TaggerFstCorpus will need it
            //anyways. This prevents to create another SlowCompositeReaderWrapper.
            final IndexReader reader = searcher.getAtomicReader();
            log.info(" ... build FST corpus for {}",fstInfo);
            corpus = AccessController.doPrivileged(new PrivilegedExceptionAction<TaggerFstCorpus>() {
                public TaggerFstCorpus run() throws IOException {
                    return new TaggerFstCorpus(reader, searcher.getIndexReader().getVersion(),
                        null, fstInfo.indexedField, fstInfo.storedField, fstInfo.analyzer,
                        fstInfo.partialMatches,1,100);
                }
            });
        } catch (PrivilegedActionException pae) {
            Exception e = pae.getException();
            if(e instanceof IOException){ //IO Exception while loading the file
                throw new IllegalStateException("Unable to read Information to build "
                        + fstInfo + " from SolrIndex '" + core.getName() + "'!", e);
            } else { //Runtime exception
                throw RuntimeException.class.cast(e);
            }
        } finally {
            searcherRef.decref(); //ensure that we dereference the searcher
        }
        if(indexConfig.isActive()){
            //set the created corpus to the FST Info
            fstInfo.setCorpus(enqueued, corpus);
            try { //STANBOL-1177: save FST models in AccessController.doPrivileged(..)
                AccessController.doPrivileged(new PrivilegedExceptionAction<Object>() {
                    public Object run() throws IOException {
                        if(fstInfo.fst.exists()){
                            if(!FileUtils.deleteQuietly(fstInfo.fst)){
                                log.warn("Unable to delete existing FST file for {}", fstInfo);
                            }
                        }
                        corpus.save(fstInfo.fst);
                        return null; //not used
                    }
                });
            } catch (PrivilegedActionException pae) {
                Exception e = pae.getException();
View Full Code Here

        this.corpusInfo = corpus;
    }
   
    @Override
    public void run() {
        TaggerFstCorpus corpus = null;
        RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
        try {
            SolrIndexSearcher searcher = searcherRef.get();
            //we do get the AtomicReader, because TaggerFstCorpus will need it
            //anyways. This prevents to create another SlowCompositeReaderWrapper.
            IndexReader reader = searcher.getAtomicReader();
            log.info(" ... build {}", corpusInfo);
            corpus = new TaggerFstCorpus(reader, searcher.getIndexReader().getVersion(),
                null, corpusInfo.indexedField, corpusInfo.storedField, corpusInfo.analyzer,
                corpusInfo.partialMatches,1,200);
        } catch (IOException e) {
            throw new IllegalStateException("Unable to read Information to build "
                    + corpusInfo + " from SolrIndex '" + core.getName() + "'!", e);
        } finally {
            searcherRef.decref(); //ensure that we dereference the searcher
        }
        if(corpusInfo.fst.exists()){
            if(!FileUtils.deleteQuietly(corpusInfo.fst)){
                log.warn("Unable to delete existing FST fiel for {}",corpusInfo);
            }
        }
        try {
            corpus.save(corpusInfo.fst);
        } catch (IOException e) {
            log.warn("Unable to store FST corpus " + corpusInfo + " to "
                    + corpusInfo.fst.getAbsolutePath() + "!", e);
        }
    }
View Full Code Here

TOP

Related Classes of org.opensextant.solrtexttagger.TaggerFstCorpus

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.