Package org.carrot2.core

Examples of org.carrot2.core.Document


        return Lists.newArrayList(set);
    }

    protected Document documentWithPartitions(final String... partitions)
    {
        final Document document = new Document();
        document.setField(Document.PARTITIONS, Arrays.asList(partitions));
        return document;
    }
View Full Code Here


        };
    }

    protected Cluster [] overlappingClustersWithHardPartitions()
    {
        final Document document = documentWithPartitions("t2");
        final Cluster c1 = new Cluster("c1", documentWithPartitions("t1"),
            documentWithPartitions("t1"), document);
        final Cluster c2 = new Cluster("c2", document, documentWithPartitions("t2"));
        return new Cluster []
        {
View Full Code Here

        };
    }

    protected Cluster [] overlappingClustersWithOverlappingPartitions()
    {
        final Document documentWithTwoPartitions = documentWithPartitions("t1", "t2");
        final Cluster c1 = new Cluster("c1", documentWithPartitions("t1"),
            documentWithPartitions("t1"), documentWithTwoPartitions);
        final Cluster c2 = new Cluster("c2", documentWithTwoPartitions,
            documentWithPartitions("t2"));
        return new Cluster []
View Full Code Here

    @Test
    public void smokeTest()
    {
        final List<Document> documents = Lists.newArrayList();
        documents.add(new Document("WordA . WordA"));
        documents.add(new Document("WordB . WordB"));
        documents.add(new Document("WordC . WordC"));
        documents.add(new Document("WordA . WordA"));
        documents.add(new Document("WordB . WordB"));
        documents.add(new Document("WordC . WordC"));

        BisectingKMeansClusteringAlgorithmDescriptor.attributeBuilder(
            processingAttributes).labelCount(1).partitionCount(3);
        final List<Cluster> clusters = cluster(documents).getClusters();
View Full Code Here

        response.metadata.put(SearchEngineResponse.RESULTS_TOTAL_KEY, docs.totalHits);

        for (ScoreDoc scoreDoc : docs.scoreDocs)
        {
            final Document doc = new Document();
            final org.apache.lucene.document.Document luceneDoc = searcher
                .doc(scoreDoc.doc);

            // Set score before mapping to give the mapper a chance to override it
            doc.setScore((double) scoreDoc.score);

            if (keepLuceneDocuments)
            {
                doc.setField(LUCENE_DOCUMENT_FIELD, luceneDoc);
                doc.addSerializationListener(removeLuceneDocument);
            }

            this.fieldMapper.map((Query) query, analyzer, luceneDoc, doc);
            response.results.add(doc);
        }
View Full Code Here

    @Test
    public void testNoLabelCandidates()
    {
        final List<Document> documents = Lists.newArrayList();
        documents.add(new Document("test"));
        documents.add(new Document("test"));
        documents.add(new Document("test"));
        processingAttributes.put(AttributeNames.QUERY, "test");

        final List<Cluster> clusters = cluster(documents).getClusters();

        assertNotNull(clusters);
View Full Code Here

    }

    @Test
    public void testStemmingUsedWithDefaultAttributes()
    {
        final List<Document> documents = ImmutableList.of(new Document("program"),
            new Document("programs"), new Document("programming"),
            new Document("program"), new Document("programs"),
            new Document("programming"), new Document("other"));

        final List<Cluster> clusters = cluster(documents).getClusters();
        assertThat(clusters).hasSize(2);
        assertThat(clusters.get(0).getLabel().toLowerCase()).startsWith("program");
    }
View Full Code Here

    @Test
    public void testMergingBaseClustersWithStemEquivalentPhrases()
    {
        List<Document> documents = Lists.newArrayList();
        documents.add(new Document("good programs . foo1"));
        documents.add(new Document("foo2 good programs . foo2"));
        documents.add(new Document("good programs taste good"));
        documents.add(new Document("good programs are good"));

        documents.add(new Document("good programming . foo3"));
        documents.add(new Document("foo4 good programming . foo4"));
        documents.add(new Document("good programming makes you feel better"));

        // Lower base cluster score.
        STCClusteringAlgorithmDescriptor.attributeBuilder(processingAttributes)
            .minBaseClusterScore(0);
View Full Code Here

        final ITokenizer ts = context.language.getTokenizer();
        final MutableCharArray wrapper = new MutableCharArray(CharArrayUtils.EMPTY_ARRAY);

        while (docIterator.hasNext())
        {
            final Document doc = docIterator.next();

            boolean hadTokens = false;
            for (int i = 0; i < fieldNames.length; i++)
            {
                final byte fieldIndex = (byte) i;
                final String fieldName = fieldNames[i];
                final String fieldValue = doc.getField(fieldName);

                if (!StringUtils.isEmpty(fieldValue))
                {
                    try
                    {
View Full Code Here

    {
        ArrayList<Document> smallDocs = MemTimeBenchmark.documents;
        ArrayList<Document> largeDocs = new ArrayList<Document>();

        final int MERGE_DOCS = 5;
        Document last = null;
        for (int i = 0; i < smallDocs.size(); i++)
        {
            if ((i % MERGE_DOCS) == 0)
            {
                largeDocs.add(last = new Document());
            }
           
            Document d = smallDocs.get(i);
            last.setTitle(join(last.getTitle(), d.getTitle()));
            last.setSummary(join(last.getSummary(), d.getSummary()));
            if (last.getContentUrl() == null)
                last.setContentUrl(d.getContentUrl());
        }
       
        MemTimeBenchmark.documents = largeDocs;
    }
View Full Code Here

TOP

Related Classes of org.carrot2.core.Document

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.