Examples of org.carrot2.core.Controller

org.carrot2.core.Controller
A controller implementing the life cycle described in {@link IProcessingComponent}. Use {@link ControllerFactory} to obtain controllers with different characteristics, e.g.with or without pooling of {@link IProcessingComponent}, with or without caching of the processing results. If further customizations are needed, you can provide your own {@link IProcessingComponentManager} implementation.
Calls to {@link #process(Map,Class)} are thread-safe, although some care should begiven to initialization. Controller instance should be initialized (using any of the {@link #init()} methods) before other threads are allowed to see its instance.{@link #dispose()} should be called after all threads leave{@link #process(Map,Class)} and {@link #process(Map,Object)}.

Notice for {@link IProcessingComponent} developers: if data caching is used, values of{@link Output} attributes produced by the components whose output is to be cached(e.g., the {@link Document} instances in case {@link IDocumentSource} output is cached)may be accessed concurrently and therefore must be thread-safe.
@see ControllerFactory

public class FetchAndSaveBingResponse
{
    public static void main(String [] args)
        throws Exception
    {
        final Controller controller = ControllerFactory.createSimple();
        try {
            String appid = System.getProperty(Bing3DocumentSource.SYSPROP_BING3_API);
            if (Strings.isNullOrEmpty(appid))
            {
                System.err.println("Provide Bing3 API key in property: " 
                    + Bing3DocumentSource.SYSPROP_BING3_API);
            }


            final Map<String, Object> attributes = new HashMap<String, Object>();
            CommonAttributesDescriptor.attributeBuilder(attributes)
                .query("डाटा माइनिंग")
                .results(200);


            /* Put your own API key here or in a system property! */
            Bing3WebDocumentSourceDescriptor.attributeBuilder(attributes)
                .appid(appid)
                .market((MarketOption) null);


            ProcessingResult result = controller.process(attributes, Bing3WebDocumentSource.class);
            Persister p = new Persister();
            p.write(result, new File("result.xml"));
        } finally {
            controller.dispose();
        }        
    }

View Full Code Here

public class SavingResultsToJson
{
    public static void main(String [] args) throws Exception
    {
        // Let's fetch some results from MSN first
        final Controller controller = ControllerFactory.createSimple();
        final Map<String, Object> attributes = Maps.newHashMap();
        CommonAttributesDescriptor.attributeBuilder(attributes)
            .documents(new ArrayList<Document>(SampleDocumentData.DOCUMENTS_DATA_MINING))
            .query("data mining");


        final ProcessingResult result = controller.process(attributes,
            LingoClusteringAlgorithm.class);


        // Now, we can serialize the entire result to XML like this
        result.serializeJson(new PrintWriter(System.out));
        System.out.println();

View Full Code Here


            final Map<String, Object> fasterClusteringAttributes = attributeValueSets
                .getAttributeValueSet("faster-clustering").getAttributeValues();


            // Perform processing using the attribute values
            final Controller controller = ControllerFactory.createSimple();


            // Initialize the controller with one attribute set
            controller.init(fasterClusteringAttributes);


            // Perform clustering using the attribute set provided at initialization time
            Map<String, Object> requestAttributes = Maps.newHashMap(); 
            CommonAttributesDescriptor.attributeBuilder(requestAttributes)
                .documents(Lists.newArrayList(SampleDocumentData.DOCUMENTS_DATA_MINING))
                .query("data mining");
            ProcessingResult results = controller.process(requestAttributes, LingoClusteringAlgorithm.class);
            ConsoleFormatter.displayClusters(results.getClusters());


            // Perform clustering using some other attribute set, in this case the
            // one that is the default in the XML file.
            requestAttributes =
                CommonAttributesDescriptor.attributeBuilder(Maps.newHashMap(defaultAttributes))
                    .documents(Lists.newArrayList(SampleDocumentData.DOCUMENTS_DATA_MINING))
                    .query("data mining").map;


            results = controller.process(requestAttributes, LingoClusteringAlgorithm.class);
            ConsoleFormatter.displayClusters(results.getClusters());
        }
        finally
        {
            CloseableUtils.close(xmlStream);

View Full Code Here

public class SavingResultsToXml
{
    public static void main(String [] args) throws Exception
    {
        // Let's fetch some results from MSN first
        final Controller controller = ControllerFactory.createSimple();
        final Map<String, Object> attributes = Maps.newHashMap();
        CommonAttributesDescriptor.attributeBuilder(attributes)
            .documents(new ArrayList<Document>(SampleDocumentData.DOCUMENTS_DATA_MINING))
            .query("data mining");


        final ProcessingResult result = controller.process(attributes,
            LingoClusteringAlgorithm.class);


        // Now, we can serialize the entire result to XML like this
        result.serialize(System.out);
        System.out.println();

View Full Code Here

         * Create a caching controller that will reuse processing component instances, but
         * will not perform any caching of results produced by components. We will leave
         * caching of documents from Lucene index to Lucene and the operating system
         * caches.
         */
        final Controller controller = ControllerFactory.createPooling();


        /*
         * Prepare a map with component-specific attributes. Here, this map will contain
         * the index location and names of fields to be used to fetch document title and
         * summary.
         */
        final Map<String, Object> luceneGlobalAttributes = new HashMap<String, Object>();


        String indexPath = "put your index path here or pass as the first argument";
        if (args.length == 1)
        {
            indexPath = args[0];
        }


        LuceneDocumentSourceDescriptor
            .attributeBuilder(luceneGlobalAttributes)
            .directory(FSDirectory.open(new File(indexPath)));


        /*
         * Specify fields providing data inside your Lucene index.
         */
        SimpleFieldMapperDescriptor
            .attributeBuilder(luceneGlobalAttributes)
            .titleField("title")
            .contentField("snippet")
            .searchFields(Arrays.asList(new String [] {"titleField", "fullContent"}));


        /*
         * Initialize the controller passing the above attributes as component-specific
         * for Lucene. The global attributes map will be empty. Note that we've provided
         * an identifier for our specially-configured Lucene component, we'll need to use
         * this identifier when performing processing.
         */
        controller.init(new HashMap<String, Object>(),
            new ProcessingComponentConfiguration(LuceneDocumentSource.class, "lucene",
                luceneGlobalAttributes));


        /*
         * Perform processing.
         */
        String query = "mining";
        final Map<String, Object> processingAttributes = Maps.newHashMap();
        CommonAttributesDescriptor.attributeBuilder(processingAttributes)
            .query(query);


        /*
         * We need to refer to the Lucene component by its identifier we set during
         * initialization. As we've not assigned any identifier to the
         * LingoClusteringAlgorithm we want to use, we can its fully qualified class name.
         */
        ProcessingResult process = controller.process(processingAttributes, "lucene",
            LingoClusteringAlgorithm.class.getName());
        
        ConsoleFormatter.displayResults(process);
    }

View Full Code Here

         * Create the caching controller. You need only one caching controller instance
         * per application life cycle. This controller instance will cache the results
         * fetched from any document source and also clusters generated by the Lingo
         * algorithm.
         */
        final Controller controller = ControllerFactory.createCachingPooling(
            IDocumentSource.class, LingoClusteringAlgorithm.class);


        /*
         * Before using the caching controller, you must initialize it. On initialization,
         * you can set default values for some attributes. In this example, we'll set the
         * default results number to 50 and the API key.
         */
        final Map<String, Object> globalAttributes = new HashMap<String, Object>();
        CommonAttributesDescriptor
            .attributeBuilder(globalAttributes)
                .results(50);
        Bing3WebDocumentSourceDescriptor
            .attributeBuilder(globalAttributes)
                .appid(BingKeyAccess.getKey()); // use your own ID here
        controller.init(globalAttributes);


        /*
         * The controller is now ready to perform queries. To show that the documents from
         * the document input are cached, we will perform the same query twice and measure
         * the time for each query.
         */
        ProcessingResult result;
        long start, duration;


        final Map<String, Object> attributes;
        attributes = new HashMap<String, Object>();
        CommonAttributesDescriptor.attributeBuilder(attributes).query("data mining");


        start = System.currentTimeMillis();
        result = controller.process(attributes, Bing3WebDocumentSource.class,
            LingoClusteringAlgorithm.class);
        duration = System.currentTimeMillis() - start;
        System.out.println(duration + " ms (empty cache)");


        start = System.currentTimeMillis();
        result = controller.process(attributes, Bing3WebDocumentSource.class,
            LingoClusteringAlgorithm.class);
        duration = System.currentTimeMillis() - start;
        System.out.println(duration + " ms (documents and clusters from cache)");
        // [[[end:using-caching-controller]]]
    }

View Full Code Here

    public static void main(String [] args)
    {
        /*
         * Create a pooling controller (reuses components).
         */
        final Controller controller = ControllerFactory.createPooling();
        try {
            final Map<String, Object> params = new HashMap<String, Object>();
    
            /*
             * Add attributes relevant to the source and algorithm we will be
             * using. Note the builder classes are generated by annotation
             * processor (which must be in the compiler's classpath!).
             */
            /*
             * An alternative strategy is to put relevant attribute keys in the 
             * map directly but it can be tedious.
             */
            ModuloDocumentSourceDescriptor.attributeBuilder(params)
                .query("dummy")
                .results(10)
                .documents(SampleDocumentData.DOCUMENTS_DATA_MINING)
                .modulo(2)
                .analyzer(new WhitespaceAnalyzer(Version.LUCENE_CURRENT));


            ByFirstTitleLetterClusteringAlgorithmDescriptor.attributeBuilder(params)
                .caseSensitive(false);


            /*
             * Invoke processing on the controller and display the result. 
             */
            final ProcessingResult result = controller.process(params,
                ModuloDocumentSource.class, 
                ByFirstTitleLetterClusteringAlgorithm.class);


            ConsoleFormatter.displayResults(result);
        } finally {
            controller.dispose();
        }
    }

View Full Code Here

 */
public class UsingCustomLanguageModel
{
    public static void main(String [] args)
    {
        @SuppressWarnings("unchecked")
        final Controller controller = ControllerFactory
            .createCachingPooling(IDocumentSource.class);


        // We will pass our custom language model element factories classes as a
        // initialization-time attributes. It is preferred to passing them as
        // processing-time attributes because the instances created at initialization
        // time will be reused for all further requests.
        Map<String, Object> attrs = Maps.newHashMap();
        BasicPreprocessingPipelineDescriptor.attributeBuilder(attrs)
            .stemmerFactory(CustomStemmerFactory.class)
            .tokenizerFactory(CustomTokenizerFactory.class)
            .lexicalDataFactory(CustomLexicalDataFactory.class);
        controller.init(attrs);


        // Cluster some data with Lingo and STC. Notice how the cluster quality degrades
        // when the stop word list is empty (especially for STC).
        clusterAndDisplayClusters(controller, LingoClusteringAlgorithm.class);
        clusterAndDisplayClusters(controller, STCClusteringAlgorithm.class);

View Full Code Here

        final ILexicalData lexicalData1;
        final ILexicalData lexicalData2;


        // Use ctrl1
        {
            final Controller ctrl = ControllerFactory.createPooling();
            final ProcessingResult result = ctrl.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);


            lexicalData1 = result.getAttribute("english");
        }


        // Use ctrl1
        {
            final Controller ctrl = ControllerFactory.createPooling();
            final ProcessingResult result = ctrl.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);


            lexicalData2 = result.getAttribute("english");
        }

View Full Code Here

            DefaultLexicalDataFactory.class, "reloadResources");


        final IResourceLocator classpathLocator = Location.CONTEXT_CLASS_LOADER.locator;


        // Create pooling controller, use tempDir1
        final Controller ctrl1 = ControllerFactory.createPooling();
        final ILexicalData data1;
        {
            ctrl1.init(ImmutableMap.<String, Object> of(
                resourceLookupKey, 
                new ResourceLookup(new DirLocator(tempDir1), classpathLocator)));


            final ProcessingResult result = ctrl1.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);


            data1 = result.getAttribute("english");
            assertTrue(data1.isCommonWord(new MutableCharArray("uniquea")));
        }


        // Create another pooling controller, same folder, but different resource lookup.
        final Controller ctrl2 = ControllerFactory.createPooling();
        final ILexicalData data2;
        {
            ctrl2.init(ImmutableMap.<String, Object> of(
                resourceLookupKey, 
                new ResourceLookup(new DirLocator(tempDir1), classpathLocator)));


            final ProcessingResult result = ctrl2.process(
                Collections.<String, Object> emptyMap(), TestComponent.class);


            data2 = result.getAttribute("english");
            assertTrue(data2.isCommonWord(new MutableCharArray("uniquea")));


            assertSame(data1, data2);
        }


        /*
         * Now force reloading of resources from that path on ctrl1. The new stop word resource
         * should contain 'uniqueb'.
         */
        FileUtils.writeStringToFile(new File(tempDir1, "stopwords.en"), "uniqueb");


        final ILexicalData data3 = ctrl1.process(
            ImmutableMap.<String, Object> of(reloadResourcesKey, true), TestComponent.class)
                .getAttribute("english");


        assertNotSame(data1, data3);
        assertFalse(data3.isCommonWord(new MutableCharArray("uniquea")));
        assertTrue(data3.isCommonWord(new MutableCharArray("uniqueb")));


        /*
         * But since it's the same location, all other controllers should now see updated resources
         * (and share the same lexical data).
         */
        final ILexicalData data4 = ctrl2.process(
            Collections.<String, Object> emptyMap(), TestComponent.class).getAttribute("english");


        assertSame(data3, data4);
    }

View Full Code Here

0 1 2 3 4 5 6 7 8 9

TOP

Related Classes of org.carrot2.core.Controller

com.carrotsearch.hppc.IntArrayList

com.carrotsearch.hppc.IntStack

com.tamingtext.carrot2.Carrot2ExampleTest

org.apache.http.message.BasicNameValuePair

org.apache.lucene.search.IndexSearcher

org.apache.mahout.math.matrix.DoubleMatrix2D

org.apache.velocity.VelocityContext

org.carrot2.cli.batch.BatchApp

org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm

org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithmTest

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.