Package org.apache.cassandra.repair

Source Code of org.apache.cassandra.repair.Validator

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.repair;

import java.net.InetAddress;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;

import com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.cassandra.concurrent.Stage;
import org.apache.cassandra.concurrent.StageManager;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.ColumnFamilyStore;
import org.apache.cassandra.db.DecoratedKey;
import org.apache.cassandra.db.compaction.AbstractCompactedRow;
import org.apache.cassandra.net.MessagingService;
import org.apache.cassandra.repair.messages.ValidationComplete;
import org.apache.cassandra.utils.FBUtilities;
import org.apache.cassandra.utils.MerkleTree;

/**
* Handles the building of a merkle tree for a column family.
*
* Lifecycle:
* 1. prepare() - Initialize tree with samples.
* 2. add() - 0 or more times, to add hashes to the tree.
* 3. complete() - Enqueues any operations that were blocked waiting for a valid tree.
*/
public class Validator implements Runnable
{
    private static final Logger logger = LoggerFactory.getLogger(Validator.class);

    public final RepairJobDesc desc;
    public final InetAddress initiator;
    public final MerkleTree tree;
    public final int gcBefore;

    // null when all rows with the min token have been consumed
    private transient long validated;
    private transient MerkleTree.TreeRange range;
    private transient MerkleTree.TreeRangeIterator ranges;
    private transient DecoratedKey lastKey;

    /**
     * Create Validator with default size of initial Merkle Tree.
     */
    public Validator(RepairJobDesc desc, InetAddress initiator, int gcBefore)
    {
        this(desc,
             initiator,
             // TODO: memory usage (maxsize) should either be tunable per
             // CF, globally, or as shared for all CFs in a cluster
             new MerkleTree(DatabaseDescriptor.getPartitioner(), desc.range, MerkleTree.RECOMMENDED_DEPTH, (int)Math.pow(2, 15)),
             gcBefore);
    }

    public Validator(RepairJobDesc desc, InetAddress initiator, MerkleTree tree, int gcBefore)
    {
        this.desc = desc;
        this.initiator = initiator;
        this.tree = tree;
        this.gcBefore = gcBefore;
        validated = 0;
        range = null;
        ranges = null;
    }

    public void prepare(ColumnFamilyStore cfs)
    {
        if (!tree.partitioner().preservesOrder())
        {
            // You can't beat an even tree distribution for md5
            tree.init();
        }
        else
        {
            List<DecoratedKey> keys = new ArrayList<>();
            for (DecoratedKey sample : cfs.keySamples(desc.range))
            {
                assert desc.range.contains(sample.token): "Token " + sample.token + " is not within range " + desc.range;
                keys.add(sample);
            }

            if (keys.isEmpty())
            {
                // use an even tree distribution
                tree.init();
            }
            else
            {
                int numkeys = keys.size();
                Random random = new Random();
                // sample the column family using random keys from the index
                while (true)
                {
                    DecoratedKey dk = keys.get(random.nextInt(numkeys));
                    if (!tree.split(dk.token))
                        break;
                }
            }
        }
        logger.debug("Prepared AEService tree of size {} for {}", tree.size(), desc);
        ranges = tree.invalids();
    }

    /**
     * Called (in order) for every row present in the CF.
     * Hashes the row, and adds it to the tree being built.
     *
     * @param row Row to add hash
     */
    public void add(AbstractCompactedRow row)
    {
        assert desc.range.contains(row.key.token) : row.key.token + " is not contained in " + desc.range;
        assert lastKey == null || lastKey.compareTo(row.key) < 0
               : "row " + row.key + " received out of order wrt " + lastKey;
        lastKey = row.key;

        if (range == null)
            range = ranges.next();

        // generate new ranges as long as case 1 is true
        while (!range.contains(row.key.token))
        {
            // add the empty hash, and move to the next range
            range.ensureHashInitialised();
            range = ranges.next();
        }

        // case 3 must be true: mix in the hashed row
        range.addHash(rowHash(row));
    }

    static class CountingDigest extends MessageDigest
    {
        private long count;
        private MessageDigest underlying;

        public CountingDigest(MessageDigest underlying)
        {
            super(underlying.getAlgorithm());
            this.underlying = underlying;
        }

        @Override
        protected void engineUpdate(byte input)
        {
            underlying.update(input);
            count += 1;
        }

        @Override
        protected void engineUpdate(byte[] input, int offset, int len)
        {
            underlying.update(input, offset, len);
            count += len;
        }

        @Override
        protected byte[] engineDigest()
        {
            return underlying.digest();
        }

        @Override
        protected void engineReset()
        {
            underlying.reset();
        }

    }

    private MerkleTree.RowHash rowHash(AbstractCompactedRow row)
    {
        validated++;
        // MerkleTree uses XOR internally, so we want lots of output bits here
        CountingDigest digest = new CountingDigest(FBUtilities.newMessageDigest("SHA-256"));
        row.update(digest);
        return new MerkleTree.RowHash(row.key.token, digest.digest(), digest.count);
    }

    /**
     * Registers the newly created tree for rendezvous in Stage.ANTIENTROPY.
     */
    public void complete()
    {
        completeTree();

        StageManager.getStage(Stage.ANTI_ENTROPY).execute(this);

        if (logger.isDebugEnabled())
        {
            // log distribution of rows in tree
            logger.debug("Validated {} partitions for {}.  Partitions per leaf are:", validated, desc.sessionId);
            tree.histogramOfRowCountPerLeaf().log(logger);
            logger.debug("Validated {} partitions for {}.  Partition sizes are:", validated, desc.sessionId);
            tree.histogramOfRowSizePerLeaf().log(logger);
        }
    }

    @VisibleForTesting
    public void completeTree()
    {
        assert ranges != null : "Validator was not prepared()";

        if (range != null)
            range.ensureHashInitialised();
        while (ranges.hasNext())
        {
            range = ranges.next();
            range.ensureHashInitialised();
        }
    }

    /**
     * Called when some error during the validation happened.
     * This sends RepairStatus to inform the initiator that the validation has failed.
     * The actual reason for failure should be looked up in the log of the host calling this function.
     */
    public void fail()
    {
        logger.error("Failed creating a merkle tree for {}, {} (see log for details)", desc, initiator);
        // send fail message only to nodes >= version 2.0
        MessagingService.instance().sendOneWay(new ValidationComplete(desc).createMessage(), initiator);
    }

    /**
     * Called after the validation lifecycle to respond with the now valid tree. Runs in Stage.ANTIENTROPY.
     */
    public void run()
    {
        // respond to the request that triggered this validation
        if (!initiator.equals(FBUtilities.getBroadcastAddress()))
            logger.info(String.format("[repair #%s] Sending completed merkle tree to %s for %s/%s", desc.sessionId, initiator, desc.keyspace, desc.columnFamily));
        MessagingService.instance().sendOneWay(new ValidationComplete(desc, tree).createMessage(), initiator);
    }
}
TOP

Related Classes of org.apache.cassandra.repair.Validator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.