Examples of DoubleWritable


Examples of org.apache.hadoop.io.DoubleWritable

      } else if (key.stringAt(0).equals(BayesConstants.DOCUMENT_FREQUENCY)) {
        String label = key.stringAt(1);
        Double labelDocumentCount = labelDocumentCounts.get(label);
        double logIdf = Math.log(labelDocumentCount / value.get());
        key.replaceAt(0, BayesConstants.WEIGHT);
        output.collect(key, new DoubleWritable(logIdf));
        reporter.setStatus("Bayes TfIdf Mapper: log(Idf): " + key);
      } else {
        throw new IllegalArgumentException("Unrecognized Tuple: " + key);
      }
    } else if (key.length() == 2) {
View Full Code Here

Examples of org.apache.hadoop.io.DoubleWritable

     
      StringTuple outputTuple = new StringTuple(BayesConstants.CLASSIFIER_TUPLE);
      outputTuple.add(correctLabel);
      outputTuple.add(classifiedLabel);
     
      output.collect(outputTuple, new DoubleWritable(1.0));
    } catch (InvalidDatastoreException e) {
      throw new IOException(e.toString());
    }
  }
View Full Code Here

Examples of org.apache.hadoop.io.DoubleWritable

          TupleWritable cachedTuple = new TupleWritable(3);
          int simItem = curTuple.getInt(1);
          double score = curTuple.getDouble(2);
          cachedTuple.set(0, new VIntWritable(seenItem));
          cachedTuple.set(1, new VIntWritable(simItem));
          cachedTuple.set(2, new DoubleWritable(score));
          cachedSimilarItems.add(cachedTuple);
        } else {
          // Encountered tuple from the 'user' relation (ID=1), Do the JOIN
          int userId = curTuple.getInt(1);
          user.set(userId);
View Full Code Here

Examples of org.apache.hadoop.io.DoubleWritable

   
    double[] logTotals = new double[state.numTopics];
    Arrays.fill(logTotals, Double.NEGATIVE_INFINITY);
   
    // Output sufficient statistics for each word. == pseudo-log counts.
    DoubleWritable v = new DoubleWritable();
    for (Iterator<Vector.Element> iter = wordCounts.iterateNonZero(); iter.hasNext();) {
      Vector.Element e = iter.next();
      int w = e.index();
     
      for (int k = 0; k < state.numTopics; ++k) {
        v.set(doc.phi(k, w) + Math.log(e.get()));
       
        IntPairWritable kw = new IntPairWritable(k, w);
       
        // ouput (topic, word)'s logProb contribution
        context.write(kw, v);
        logTotals[k] = LDAUtil.logSum(logTotals[k], v.get());
      }
    }
   
    // Output the totals for the statistics. This is to make
    // normalizing a lot easier.
    for (int k = 0; k < state.numTopics; ++k) {
      IntPairWritable kw = new IntPairWritable(k, LDADriver.TOPIC_SUM_KEY);
      v.set(logTotals[k]);
      assert !Double.isNaN(v.get());
      context.write(kw, v);
    }
    IntPairWritable llk = new IntPairWritable(LDADriver.LOG_LIKELIHOOD_KEY, LDADriver.LOG_LIKELIHOOD_KEY);
    // Output log-likelihoods.
    v.set(doc.logLikelihood);
    context.write(llk, v);
  }
View Full Code Here

Examples of org.apache.hadoop.io.DoubleWritable

        if (Double.isNaN(v)) {
          throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
        }
        accum += v;
      }
      context.write(topicWord, new DoubleWritable(accum));
    } else { // log sum sufficient statistics.
      double accum = Double.NEGATIVE_INFINITY;
      for (DoubleWritable vw : values) {
        double v = vw.get();
        if (Double.isNaN(v)) {
          throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
        }
        accum = LDAUtil.logSum(accum, v);
        if (Double.isNaN(accum)) {
          throw new IllegalArgumentException(topicWord.getFirst() + " " + topicWord.getSecond());
        }
      }
      context.write(topicWord, new DoubleWritable(accum));
    }
   
  }
View Full Code Here

Examples of org.apache.hadoop.io.DoubleWritable

  private static void writeInitialState(String statePath, int numTopics, int numWords) throws IOException {
    Path dir = new Path(statePath);
    Configuration job = new Configuration();
    FileSystem fs = dir.getFileSystem(job);
   
    DoubleWritable v = new DoubleWritable();
   
    Random random = RandomUtils.getRandom();
   
    for (int k = 0; k < numTopics; ++k) {
      Path path = new Path(dir, "part-" + k);
      SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, IntPairWritable.class,
          DoubleWritable.class);
     
      double total = 0.0; // total number of pseudo counts we made
      for (int w = 0; w < numWords; ++w) {
        IntPairWritable kw = new IntPairWritable(k, w);
        // A small amount of random noise, minimized by having a floor.
        double pseudocount = random.nextDouble() + 1.0E-8;
        total += pseudocount;
        v.set(Math.log(pseudocount));
        writer.append(kw, v);
      }
      IntPairWritable kTsk = new IntPairWritable(k, TOPIC_SUM_KEY);
      v.set(Math.log(total));
      writer.append(kTsk, v);
     
      writer.close();
    }
  }
View Full Code Here

Examples of org.apache.hadoop.io.DoubleWritable

    FileSystem fs = dir.getFileSystem(job);
   
    double ll = 0.0;
   
    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        if (key.getFirst() == LOG_LIKELIHOOD_KEY) {
          ll = value.get();
          break;
        }
      }
      reader.close();
    }
View Full Code Here

Examples of org.apache.hadoop.io.DoubleWritable

    DenseMatrix pWgT = new DenseMatrix(numTopics, numWords);
    double[] logTotals = new double[numTopics];
    double ll = 0.0;
   
    IntPairWritable key = new IntPairWritable();
    DoubleWritable value = new DoubleWritable();
    for (FileStatus status : fs.globStatus(new Path(dir, "part-*"))) {
      Path path = status.getPath();
      SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, job);
      while (reader.next(key, value)) {
        int topic = key.getFirst();
        int word = key.getSecond();
        if (word == TOPIC_SUM_KEY) {
          logTotals[topic] = value.get();
          if (Double.isInfinite(value.get())) {
            throw new IllegalArgumentException();
          }
        } else if (topic == LOG_LIKELIHOOD_KEY) {
          ll = value.get();
        } else {
          if (!((topic >= 0) && (word >= 0))) {
            throw new IllegalArgumentException(topic + " " + word);
          }
          if (pWgT.getQuick(topic, word) != 0.0) {
            throw new IllegalArgumentException();
          }
          pWgT.setQuick(topic, word, value.get());
          if (Double.isInfinite(pWgT.getQuick(topic, word))) {
            throw new IllegalArgumentException();
          }
        }
      }
View Full Code Here

Examples of org.apache.hadoop.io.DoubleWritable

  }

  @Test
  public void testDoubles() throws Exception {
    double j = 55.5d;
    DoubleWritable w = new DoubleWritable(j);
    testInputOutputFn(Writables.doubles(), j, w);
  }
View Full Code Here

Examples of org.apache.hadoop.io.DoubleWritable

      intnum5 = new IntWritable();
      intnum100 = new IntWritable();
      intnum = new IntWritable();
      longnum = new LongWritable();
      floatnum = new FloatWritable();
      doublenum = new DoubleWritable();
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.