架构师训练营第五周 - 作业一

作业一(2 选 1):
1.用你熟悉的编程语言实现一致性 hash 算法。
2.编写测试用例测试这个算法,测试 100 万 KV 数据,10 个服务器节点的情况下,计算这些 KV 数据在服务器上分布数量的标准差,以评估算法的存储负载不均衡性。

作业二:根据当周学习情况,完成一篇学习总结

作业一

https://github.com/chenlongqiang/lukachen-study/blob/main/u.geekbang/%E6%9E%B6%E6%9E%84%E5%B8%88%E8%AE%AD%E7%BB%83%E8%90%A5/%E7%AC%AC%E4%BA%94%E5%91%A8/HashRing.php

<?php

/**
 * 一致性哈希算法
 */
class HashRing
{
    private $clusterMachine = []; 
    private $clusterConfig = []; 
    
    public function __construct()
    {
        $this->init();
    }

    public function init()
    {
        // 集群机器列表
        $this->clusterMachine = [
            'node1' => '192.168.0.1',
            'node2' => '192.168.0.2',
            'node3' => '192.168.0.3',
            'node4' => '192.168.0.4',
            'node5' => '192.168.0.5',
            'node6' => '192.168.0.6',
            'node7' => '192.168.0.7',
            'node8' => '192.168.0.8',
            'node9' => '192.168.0.9',
            'node10' => '192.168.0.10',
        ];

        // 集群机器 hash 节点 (添加虚拟节点),按小到大排好序
        $virtualNum = 150; // 每个节点虚拟出的节点数
        foreach ($this->clusterMachine as $nodeNumber => $nodeIp) {
            for ($i = 1; $i <= $virtualNum; $i ++) {
                $nodeName = $nodeNumber . '_' . $i;
                $this->clusterConfig[$nodeName] = [
                    'node_ip' => $nodeIp,
                    'node_hash_value' => $this->getHashValue($nodeName),
                ];
            }
        }
        array_multisort(array_column($this->clusterConfig, 'node_hash_value'), SORT_ASC, $this->clusterConfig);
    }

    public function printRing()
    {
        // 先取出后一个节点
        end($this->clusterConfig);
        $prevNodeInfo = current($this->clusterConfig);
        reset($this->clusterConfig);

        foreach ($this->clusterConfig as $nodeName => $nodeInfo) {
            if ($nodeInfo['node_hash_value'] < $prevNodeInfo['node_hash_value']) {
                $betweenPrev = pow(2, 32) - $prevNodeInfo['node_hash_value'] + $nodeInfo['node_hash_value'];
            } else {
                $betweenPrev = $nodeInfo['node_hash_value'] - $prevNodeInfo['node_hash_value'];
            }
            echo "nodeName: " . $nodeName . PHP_EOL;
            echo "nodeIp: " . $nodeInfo['node_ip'] . PHP_EOL;
            echo "nodeHashValue: " . $nodeInfo['node_hash_value'] . PHP_EOL;
            echo "betweenPrev: " . $betweenPrev . PHP_EOL; // 和上一个节点的间距
            echo PHP_EOL;
            $prevNodeInfo = $nodeInfo;
        }

        // todo 计算 标准差、方差 来衡量算法分布均匀程度
    }

    public function getNode($key = '')
    {
        $keyHashValue = $this->getHashValue($key);
        
        $firstNodeName = key($this->clusterConfig);
        foreach ($this->clusterConfig as $nodeName => $nodeInfo) {
            if ($nodeInfo['node_hash_value'] >= $keyHashValue) {
                // echo $nodeName . PHP_EOL;
                return $nodeName;
            }
        }
        // echo $firstNodeName . PHP_EOL;
        return $firstNodeName;
    }

    public function getHashValue($key)
    {
        return sprintf("%u", crc32(md5($key))) % pow(2, 32);
    }

}

// 打印哈希环分布
// (new HashRing())->printRing();

// 打印 1 到 10000 在节点上分布
$hashRing = (new HashRing());
foreach (range(1,10000) as $n) {
    $nodeName = $hashRing->getNode($n);
    $nodeName = explode('_', $nodeName)[0];
    if (isset($result[$nodeName])) {
        $result[$nodeName] = $result[$nodeName] + 1;
    } else {
        $result[$nodeName] = 1;
    }
}
echo '<pre>';
print_r($result);

// 打印结果
/*
Array
(
    [node2] => 819
    [node9] => 954
    [node10] => 950
    [node1] => 1103
    [node8] => 927
    [node7] => 1023
    [node3] => 1042
    [node6] => 1260
    [node5] => 1041
    [node4] => 881
)
*/

发表新评论