Package edu.ucla.sspace.hadoop

Examples of edu.ucla.sspace.hadoop.WordCooccurrence


        int wordCount =  0;
        // Local state variables for updating the current word's vector.
        String curWord = null;
        IntegerVector semantics = null;
        while (occurrences.hasNext()) {
            WordCooccurrence occ = occurrences.next();
            String word = occ.focusWord();
            // Base case for the first word seen
            if (curWord == null) {
                curWord = word;
                semantics = createSemanticVector();
            }
            // If we've seen a new word, write the previous word's vector
            else if (!curWord.equals(word)) {
                writer.write(curWord, semantics);
                if (LOGGER.isLoggable(Level.FINE)) {
                    LOGGER.fine(String.format(
                        "processed word #%d: %s%n ", ++wordCount, curWord));
                }
                curWord = word;
                semantics = createSemanticVector();
            }

            // Check if this is the last word, if yes then write
            if(!occurrences.hasNext()) {
                writer.write(curWord, semantics);
            }

            // NOTE: because we are using a GeneratorMap, this call will create
            // a new index vector for the word if it didn't exist prior.
            TernaryVector indexVector =
                wordToIndexVector.get(occ.relativeWord());

            if (usePermutations) {
                indexVector =
                    permutationFunc.permute(indexVector, occ.getDistance());
            }
            // Scale the index vector by the number of times this occurrence
            // happened
            VectorMath.addWithScalars(
                semantics, 1, indexVector, occ.getCount());
        }
        writer.close();
    }
View Full Code Here

TOP

Related Classes of edu.ucla.sspace.hadoop.WordCooccurrence

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.