Map及散列HashCode

Map数据结构在平时开发当中经常使用到，其中HashMap因为其查询和插入非常高效快速而更受开发者热捧。

在本文当中我们通过设计HashMap来

明白HashMap高效查找和插入的原因
我们自己的类作为HashMap的键需要注意什么？
hashCode及equals方法的作用及应用

接下来我们来分析Java中Map的设计及如果我们使用自己类作为Map的键需要考虑哪些条件呢？

场景一：我们自己实现Map类型：

Map的底层是关联数组，我们可以使用两个ArrayList（一个存储Key，一个存储Value）来设计Map，我们设计MyMap继承自AbstractMap，重写put,get,entrySet等常用方法。

如果我们自己实现Map类型，就需要同时定义Map.Entry接口的实现，这样才能实现Map接口的entrySet方法，返回Map结构的Entry节点。
我们自己实现Map类型代码如下：

import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class MyMap extends AbstractMap{
    private List keyList = new ArrayList();
    private List valueList = new ArrayList();
    

public V put(K key,V value){
    V oldval = get(key);//获取原先的值
    if(oldval != null){//原先的key在map存在就更新
        valueList.set(keyList.indexOf(key), value);
    }else{
        //原先的key在map不存在就插入
        keyList.add(key);
        valueList.add(value);
    }
    
    return oldval;
}

public V get(Object key){
    if(!keyList.contains(key)){
        return null;
    }
    return valueList.get(keyList.indexOf(key));
}

@Override
public Set> entrySet() {
    Set> set = new HashSet>();
    for (int i = 0;i < keyList.size();i++) {
        set.add(new MapEntry(keyList.get(i),valueList.get(i)));
    }
    return set;
}

public static void main(String[] args) {
    MyMap map = new MyMap();
    map.put("Huawei", 1000);
    map.put("Xiaomi", 400);
    map.put("OPPO", 500);
    map.put("VIVO", 500);
    map.put("Nokia", 100);
    map.put("APPLE", 1000);
    
    System.out.println(map.keySet());
    System.out.println(map.values());
    System.out.println(map.entrySet());
    System.out.println(map.get("APPLE"));
    
    /**
     * [OPPO, APPLE, Nokia, Huawei, Xiaomi, VIVO]
        [1000, 1000, 500, 500, 100, 400]
        [Huawei = 1000, OPPO = 500, Xiaomi = 400, APPLE = 1000, VIVO = 500, Nokia = 100]
        1000
     * */
    }
}
//实现Map.Entry接口产生自己的MapEntry
class MapEntry implements Map.Entry{
    private K k;
    private V v;
public MapEntry(K k, V v) {
    this.k = k;
    this.v = v;
}

@Override
public K getKey() {
    return k;
}

@Override
public V getValue() {
    return v;
}

@Override
public V setValue(V value) {
    V v1 = v;
    v = value;
    return v1;
}

public boolean equals(Object e1){
    if(!(e1 instanceof MapEntry)){
        return false;
    }
    MapEntry me = (MapEntry)e1;
    return (me.getKey() == null ? this.getKey() == null : me.getKey().equals(this.getKey()))
            && (me.getValue() == null ? this.getValue() == null : me.getValue().equals(this.getValue()));
}

public int  hashCode(){
    return(k==null ? 0 : k.hashCode()) ^ (v==null ? 0 : v.hashCode());
}
public String toString(){
    return k + " = "+ v;
}
}

那么虽然可以正常存储数据，但是数组的查询都是线性查找，效率非常低。那么HashMap的高效查询原因在于哪里？

场景二：我们自己的类要作为HashMap的键

接下来我们再看一个场景，假设我们有自己的类要作为HashMap的键，那需要注意什么？

import java.util.HashMap;

public class Mobile {
    private String band;
    private int price;
public Mobile(String band,int price){
    this.band = band;
    this.price=price;
}
public String getBand() {
    return band;
}
public int getPrice() {
    return price;
}
public String toString(){
    return band + " the price is " +price;
}

public static void main(String[] args) {
    HashMap mobiles = new HashMap<>();
    mobiles.put(new Mobile("Huawei",1000), 10);
    mobiles.put(new Mobile("OPPO",600), 40);
    mobiles.put(new Mobile("VIVO",800), 30);
    mobiles.put(new Mobile("Xiaomi",700), 20);
    mobiles.put(new Mobile("Huawei",1000), 20);//再次插入同样的对象但是还可以插入
    
    System.out.println(mobiles);
    System.out.println(mobiles.get(new Mobile("Huawei",1000)));//找不到插入的对象
    
    /*
     * {VIVO the price is 800=30, Huawei the price is 1000=20, Huawei the price is 1000=10, OPPO the price is 600=40, Xiaomi the price is 700=20}
        null
     * */
}
}

可以看出我们自己创建的类作为HashMap的键的时候，出现了问题，插入的数据找不到了。问题的关键在于我们没有实现类的hashcode及equals方法

hashCode与equals

HashMap其高效的查询是因为其底层的散列算法hashCode来快速定位数据，散列算法在其他数据结构如：LinkedHashMap,HashSet,LinkedHashSet也有使用

散列算法的查询过程

存储一组元素最快的数据结构是数组，使用一个数组保存键的信息，而不是键的本身，具体是将键的散列值（实现hashCode方法）作为数组的下标
虽然数组是固定存储大小的，但是我们允许不同的键通过散列产生相同的下标，这会产生冲突，而数组里面存储的是链表对象来解决冲突，可以存储具体的值；
综上，查询的过程是先将对键计算散列码，根据散列码跳到数组对应的下标，根据数组下标找到对应的链表list再遍历list（实现equals方法）找到相应的值

这就是HashMap如此高效查询的原因

设计hashCode方法的注意点：

设计hashCode的关键点在于无论何时，对同一个对象调用hashCode应该产生同样的值，不能在put的时候产生一个值，在get()的时候产生另外一个值，所以我们不能使用对象中容易改变的值来hashCode
hashCode()不应该使用对象具有唯一性的对象信息，尤其是使用this的值（对象的在堆中的内存地址），这样无法生成一个新的键，使之与put()原始的键值对应的键相同，应该使用对象具有识别意义的信息
一个好的hashCode算法能够产生均匀的散列码，将键信息均匀地分布在不同的数组下标里面，这样防止数组里某个下标的链表存在过多的元素导致使用equals遍历时间过长

在《Effective Java》对于好的hashCode有基本的指导意见，如下：

在这里插入图片描述

实用的hashCode()生成速度快并且有意义，这样可以快速定位对象，equals()则对比对象的基本特征是否一样，通过hashCode()及equals()可以完全确定对象的身份。

正确的equals()方法

自反性。对任意x，x.equals(x)一定返回true
对称性。对任意x和y，如果y.equals(x)返回true，则x.equals(y)也返回true
传递性。对任意x，y，z，如果x.equals(y)返回true，y.equals(z)返回true，则x.equals(z)也返回true
一致性。对任意x和y，如果对象中用于等价比较的信息没改变，则无论调用x.equals(y)多少次，返回的结果一定保持一致，要么一直为true，要么一直为false。
对任何不是null的x，x.equals(null)一定返回false

下面我们改进上面的Map类型及Mobile2

import java.util.HashMap;

public class Mobile2 {
    private String band;
    private int price;
    
    public Mobile2(String band,int price){
        this.band = band;
        this.price=price;
    }
    public String getBand() {
        return band;
    }
    public int getPrice() {
        return price;
    }
    public String toString(){
        return band + " the price is " +price;
    }
    
    //重写Object继承而来的hashCode方法，默认的Object的hashCode根据对象的内存地址来计算，实用的hashCode根据对象的内容来生成
    public int hashCode(){
        int result = 17;
        result = 37 * result + band.hashCode();
        result = result + price;
        return result;
    }
    
    //重写Object继承而来的equals方法，默认的Object的equals比较对象的内存地址，实用的equals比较对象的关键属性
    /*Object的equals方法：
     *public boolean equals(Object obj) {
        return (this == obj);
    }* */
    public boolean equals(Object obj){
        if(!(obj instanceof Mobile2))
            return false;
        
        Mobile2 m2 = (Mobile2)obj;
        return (m2.getBand() == null ? this.band == null : m2.getBand().equals(this.band))
                && (m2.getPrice() == 0 ? this.price == 0 : m2.getPrice() == this.price);
    }
    
    public static void main(String[] args) {
        HashMap mobiles = new HashMap<>();
        mobiles.put(new Mobile2("Huawei",1000), 10);
        mobiles.put(new Mobile2("OPPO",600), 40);
        mobiles.put(new Mobile2("VIVO",800), 30);
        mobiles.put(new Mobile2("Xiaomi",700), 20);
        mobiles.put(new Mobile2("Huawei",1000), 20);//再次插入同样的对象但是还可以插入
        
        System.out.println(mobiles);
        System.out.println(mobiles.get(new Mobile2("Huawei",1000)));//找不到插入的对象
        
        /*{Xiaomi the price is 700=20, OPPO the price is 600=40, VIVO the price is 800=30, Huawei the price is 1000=20}
            20
         * */
    }
}

改进的HashMap代码如下：

import java.util.AbstractMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Map;
import java.util.Set;

public class MyHashMap extends AbstractMap{
    static final int SIZE = 997;
    
    //创建存放List的数组
    private LinkedList> [] buckets = new LinkedList [997];
    
    public V put(K key,V value){
        V oldVal = null ;
        
        //对键进行hashCode计算
        int index = Math.abs(key.hashCode())  % SIZE;
        if(buckets[index] == null){
            buckets[index] = new LinkedList>();
        }
        
        //查看key是否已经存在，如果存在则更新里面的value
        boolean isFound = false;
        LinkedList> bucket = buckets[index];
        for (MapEntry mapEntry : bucket) {
            if(mapEntry.getKey().equals(key)){
                isFound = true;
                oldVal = mapEntry.getValue();
                mapEntry.setValue(value);
                break;
            }
        }
        
        //不存在则把key和value存放到桶里面
        if(!isFound){
            MapEntry entry = new MapEntry(key, value);
            bucket.add(entry);
        }
        
        return oldVal;
    }
    
    public V get(Object key){
        V val = null;
        //对键进行hashCode计算
        int index = Math.abs(key.hashCode()) % SIZE;
        
        LinkedList> bucket = buckets[index];
        if(bucket != null){
            for (MapEntry mapEntry : bucket) {
                if(mapEntry.getKey().equals(key)){
                    val = mapEntry.getValue();
                    break;
                }
            }
        }
        
        return val;
    }
    
    @Override
    public  Set> entrySet() {
        Set> set = new HashSet>();
        for (LinkedList> bucket : buckets) {
            if(bucket == null) continue;
            for (MapEntry mapEntry : bucket) {
                set.add(mapEntry);
            }
        }
        
        return set;
    }
    
    public static void main(String[] args) {
        MyHashMap mobileMap = new MyHashMap();
        mobileMap.put(new Mobile2("Huawei",1000), 10);
        mobileMap.put(new Mobile2("OPPO",600), 40);
        mobileMap.put(new Mobile2("VIVO",800), 30);
        mobileMap.put(new Mobile2("Xiaomi",700), 20);
        mobileMap.put(new Mobile2("Huawei",1000), 20);//更新之前插入的对象
        
        System.out.println(mobileMap.entrySet());
        System.out.println(mobileMap.get(new Mobile2("Huawei",1000)));//查找插入的对象
        
        /**[OPPO the price is 600 = 40, Xiaomi the price is 700 = 20, VIVO the price is 800 = 30, Huawei the price is 1000 = 20]
            20
         * */
    }
}

综上所示，我们通过自己设计自己的HashMap了解了：

1.HashMap高效查找的原因及查找过程

2.明白了HashCode及equals方法的作用

3.我们用自己的类作为HashMap的键时需要注意重写hashCode及equals方法

以上部分内容来自《Java编程思想》