Package org.apache.lucene.analysis

Source Code of org.apache.lucene.analysis.TestComboAnalyzer

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.lucene.analysis;

import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.junit.Test;

import java.io.IOException;
import java.io.StringReader;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import static org.hamcrest.CoreMatchers.equalTo;

/**
* Testcase for {@link ComboAnalyzer}
*/
public class TestComboAnalyzer extends BaseTokenStreamTestCase {

    @Test
    public void testSingleAnalyzer() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                    new String[]{"just", "a", "little", "test", Integer.toString(i)},
                    new int[]{ 057, 14, 19},
                    new int[]{ 46, 13, 18, 20},
                    new int[]{ 11111});
    }

    @Test
    public void testMultipleAnalyzers() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
                new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                new StandardAnalyzer(TEST_VERSION_CURRENT),
                new KeywordAnalyzer()
        );
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                    new String[]{"just", "just", "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)},
                    new int[]{ 000577, 14, 14, 19, 19},
                    new int[]{ 44, 206, 13, 13, 18, 18, 20, 20},
                    new int[]{ 1001101010});
    }

    @Test
    public void testMultipleAnalyzersDeduplication() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
                new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                new StandardAnalyzer(TEST_VERSION_CURRENT),
                new KeywordAnalyzer()
        );
        cb.enableDeduplication();
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                    new String[]{"just", "just a little test "+i, "a", "little", "test", Integer.toString(i)},
                    new int[]{ 0,   057, 14, 19},
                    new int[]{ 4206, 13, 18, 20},
                    new int[]{ 1,   01111});
    }

    @Test
    public void testThreeTimesTheSameAnalyzerInstance() throws IOException {
        Analyzer analyzer = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
                analyzer,
                analyzer,
                analyzer
        );
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                    new String[]{"just", "just", "just", "a", "a", "a", "little", "little", "little", "test", "test", "test", Integer.toString(i), Integer.toString(i), Integer.toString(i)},
                    new int[]{ 00055, 5777, 14, 14, 14, 19, 19, 19},
                    new int[]{ 44466, 6, 13, 13, 13, 18, 18, 18, 20, 20, 20},
                    new int[]{ 10010, 0100100100});
    }

    @Test
    public void testCascadeCombo() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
                new ComboAnalyzer(TEST_VERSION_CURRENT,
                        new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                        new KeywordAnalyzer()
                ),
                new StandardAnalyzer(TEST_VERSION_CURRENT),
                new KeywordAnalyzer()
        );
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                    new String[]{"just", "just", "just a little test "+i, "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)},
                    new int[]{ 0000577, 14, 14, 19, 19},
                    new int[]{ 44, 20, 206, 13, 13, 18, 18, 20, 20},
                    new int[]{ 10001101010});
    }

    @Test
    public void testCascadeComboTwiceSameInstanceSolvedByCaching() throws IOException {
        Analyzer analyzer = new KeywordAnalyzer();
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT,
                new ComboAnalyzer(TEST_VERSION_CURRENT,
                        new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
                        analyzer
                ).enableTokenStreamCaching(),
                new StandardAnalyzer(TEST_VERSION_CURRENT),
                analyzer
        ).enableTokenStreamCaching();
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(cb.tokenStream("field", new StringReader("just a little test "+i)),
                    new String[]{"just", "just", "just a little test "+i, "just a little test "+i, "a", "little", "little", "test", "test", Integer.toString(i), Integer.toString(i)},
                    new int[]{ 0000577, 14, 14, 19, 19},
                    new int[]{ 44, 20, 206, 13, 13, 18, 18, 20, 20},
                    new int[]{ 10001101010});
    }

    @Test
    public void testCanUseFromNamedAnalyzer() throws IOException {
        ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
        NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb);
        for (int i = 0 ; i < 3 ; i++)
            assertTokenStreamContents(namedAnalyzer.tokenStream("field", new StringReader("just a little test " + i)),
                    new String[]{"just", "a", "little", "test", Integer.toString(i)},
                    new int[]{ 057, 14, 19},
                    new int[]{ 46, 13, 18, 20},
                    new int[]{ 11111});
    }

    @Test
    public void testReuseSequentialMultithreading() throws IOException, InterruptedException {
        // Create the analyzer
        final ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
        final NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb);
        // Use N threads, each running M times
        Thread[] threads = new Thread[4];
        final int runs = 4;
        // The lock ensures only one thread is running at a given time
        final Lock lock = new ReentrantLock();
        // This integer ensures each thread runs with a different input
        // Inputs must not be exchanged from one thread to another during object reuse
        final AtomicInteger sequence = new AtomicInteger(0);
        final AtomicBoolean abort = new AtomicBoolean(false);
        // The barrier ensures that each thread gets a chance to execute, for each run
        // We must use extra care so that all threads can exit as soon as one fails
        final CyclicBarrier latch = new CyclicBarrier(threads.length);
        // Code executed on each thread
        Runnable code = new Runnable() {
            @Override
            public void run() {
                // Run multiple times before quitting
                for (int run = 0 ; run < runs ; ++run) {
                    try {
                        // Serialize runs
                        lock.lock();
                        // Get unique sequence number
                        int i = sequence.getAndIncrement();
                        // Check the analysis went well, including the unique sequence number
                        assertTokenStreamContents(namedAnalyzer.tokenStream("field", new StringReader("just a little test " + i)),
                                new String[]{"just", "a", "little", "test", Integer.toString(i)},
                                new int[]{0, 5, 7, 14, 19},
                                new int[]{4, 6, 13, 18, 19 + ("" + i).length()},
                                new int[]{1, 1, 1, 1, 1});
                    } catch (Exception e) {
                        e.printStackTrace();
                        // Make other fail,
                        abort.set(true); // if they will soon be waiting,
                        latch.reset(); // and if they are already waiting
                        // Now we can fail!
                        assertNull(e);
                    } finally {
                        lock.unlock();
                    }
                    // Wait for other threads, so calls are well interleaved between threads
                    try {
                        if (abort.get()) return;
                        latch.await();
                    } catch (Exception e) {
                        e.printStackTrace();
                        // Make other fail,
                        abort.set(true); // if they will soon be waiting,
                        latch.reset(); // and if they are already waiting
                        // Now we can fail!
                        assertNull(e);
                    }
                }
            }
        };
        // Create the threads
        for (int i = 0 ; i < threads.length ; i++)
            threads[i] = new Thread(code);
        // Start the threads
        for (int i = 0 ; i < threads.length ; i++)
            threads[i].start();
        // Wait for completion
        for (int i = 0 ; i < threads.length ; i++)
            threads[i].join();
        // Ensure all desired runs have been performed
        assertThat(abort.get(), equalTo(false));
        assertThat(sequence.get(), equalTo(runs * threads.length));
    }

    @Test
    public void testReuseConcurrentMultithreading() throws IOException, InterruptedException {
        // Create the analyzer
        final ComboAnalyzer cb = new ComboAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
        final NamedAnalyzer namedAnalyzer = new NamedAnalyzer("name", AnalyzerScope.INDEX, cb);
        // Use N threads, each running M times
        Thread[] threads = new Thread[4];
        final int runs = 4000; // leave time for threads to run concurrently
        // This integer ensures each thread runs with a different input
        // Inputs must not be exchanged from one thread to another during object reuse
        final AtomicInteger sequence = new AtomicInteger(0);
        // The barrier ensures that each thread gets a chance to execute, for each run
        final CyclicBarrier latch = new CyclicBarrier(threads.length);
        // Code executed on each thread
        Runnable code = new Runnable() {
            @Override
            public void run() {
                try {
                    latch.await();
                    // Run multiple times before quitting
                    for (int run = 0 ; run < runs ; ++run) {
                        // Get unique sequence number
                        int i = sequence.getAndIncrement();
                        // Check the analysis went well, including the unique sequence number
                        assertTokenStreamContents(namedAnalyzer.tokenStream("field", new StringReader("just a little test " + i)),
                                new String[]{"just", "a", "little", "test", Integer.toString(i)},
                                new int[]{0, 5, 7, 14, 19},
                                new int[]{4, 6, 13, 18, 19 + ("" + i).length()},
                                new int[]{1, 1, 1, 1, 1});
                    }
                } catch (Exception e) {
                    // Fail!
                    assertNull(e);
                }
            }
        };
        // Create the threads
        for (int i = 0 ; i < threads.length ; i++)
            threads[i] = new Thread(code);
        // Start the threads
        for (int i = 0 ; i < threads.length ; i++)
            threads[i].start();
        // Wait for completion
        for (int i = 0 ; i < threads.length ; i++)
            threads[i].join();
        // Ensure all desired runs have been performed
        assertThat(sequence.get(), equalTo(runs * threads.length));
    }

}
TOP

Related Classes of org.apache.lucene.analysis.TestComboAnalyzer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.