Bitset,也就是位图,由于可以用非常紧凑的格式来表示给定范围的连续数据而经常出现在各种算法设计中。上面的图来自c++库中bitset的一张图。
基本原理是,用1位来表示一个数据是否出现过,0为没有出现过,1表示出现过。使用用的时候既可根据某一个是否为0表示此数是否出现过。
一个1G的空间,有 8*1024*1024*1024=8.58*10^9bit,也就是可以表示85亿个不同的数。
常见的应用是那些需要对海量数据进行一些统计工作的时候,比如日志分析等。
面试题中也常出现,比如:统计40亿个数据中没有出现的数据,将40亿个不同数据进行排序等。
又如:现在有1千万个随机数,随机数的范围在1到1亿之间。现在要求写出一种算法,将1到1亿之间没有在随机数中的数求出来(百度)。
programming pearls上也有一个关于使用bitset来查找电话号码的题目。
Bitmap的常见扩展,是用2位或者更多为来表示此数字的更多信息,比如出现了多少次等。
package java.util; import java.io.*; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.LongBuffer; public class BitSet implements Cloneable, java.io.Serializable {、
private long[] words; .... ....
public BitSet() { initWords(BITS_PER_WORD); sizeIsSticky = false; } public BitSet(int nbits) { // nbits can't be negative; size 0 is OK if (nbits < 0) throw new NegativeArraySizeException("nbits < 0: " + nbits); initWords(nbits); private void initWords(int nbits) { words = new long[wordIndex(nbits-1) + 1]; } private static int wordIndex(int bitIndex) { return bitIndex >> ADDRESS_BITS_PER_WORD; } private final static int ADDRESS_BITS_PER_WORD = 6; private final static int BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD;
两个构造函数,分别是一个指定了初始大小,一个没指定。如果没指定,我们可以看到默认的初始大小为, 2^6 = 64-1=63 bit. 我们知道java中long的大小就是8个字节,也就是8*8=64bit。也就是说,bitset默认的是一个long整形的大小。初始化函数指定了必要的大小。
注意:如果指定了bitset的初始化大小,那么会把他规整到一个大于或者等于这个数字的64的整倍数。比如64位,bitset的大小是1个long,而65位时,bitset大小是2个long,即128位。做这么一个规定,主要是为了内存对齐,同时避免考虑到不要处理特殊情况,简化程序。
public void clear() { while (wordsInUse > 0) words[--wordsInUse] = 0; }b. 清空某一位
public void clear(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); int wordIndex = wordIndex(bitIndex); if (wordIndex >= wordsInUse) return; words[wordIndex] &= ~(1L << bitIndex); recalculateWordsInUse(); checkInvariants(); }
/** * Sets the bits from the specified {@code fromIndex} (inclusive) to the * specified {@code toIndex} (exclusive) to {@code false}. * * @param fromIndex index of the first bit to be cleared * @param toIndex index after the last bit to be cleared * @throws IndexOutOfBoundsException if {@code fromIndex} is negative, * or {@code toIndex} is negative, or {@code fromIndex} is * larger than {@code toIndex} * @since 1.4 */ public void clear(int fromIndex, int toIndex) { checkRange(fromIndex, toIndex); if (fromIndex == toIndex) return; int startWordIndex = wordIndex(fromIndex); if (startWordIndex >= wordsInUse) return; int endWordIndex = wordIndex(toIndex - 1); if (endWordIndex >= wordsInUse) { toIndex = length(); endWordIndex = wordsInUse - 1; } long firstWordMask = WORD_MASK << fromIndex; long lastWordMask = WORD_MASK >>> -toIndex; if (startWordIndex == endWordIndex) { // Case 1: One word words[startWordIndex] &= ~(firstWordMask & lastWordMask); } else { // Case 2: Multiple words // Handle first word words[startWordIndex] &= ~firstWordMask; // Handle intermediate words, if any for (int i = startWordIndex+1; i < endWordIndex; i++) words[i] = 0; // Handle last word words[endWordIndex] &= ~lastWordMask; } recalculateWordsInUse(); checkInvariants(); }方法是将这个范围分成三块,startword; interval words; stopword。
/** * Sets the field wordsInUse to the logical size in words of the bit set. * WARNING:This method assumes that the number of words actually in use is * less than or equal to the current value of wordsInUse! */ private void recalculateWordsInUse() { // Traverse the bitset until a used word is found int i; for (i = wordsInUse-1; i >= 0; i--) if (words[i] != 0) break; wordsInUse = i+1; // The new logical size }
/** * Every public method must preserve these invariants. */ private void checkInvariants() { assert(wordsInUse == 0 || words[wordsInUse - 1] != 0); assert(wordsInUse >= 0 && wordsInUse <= words.length); assert(wordsInUse == words.length || words[wordsInUse] == 0); }
/** * Sets the bit at the specified index to the complement of its * current value. * * @param bitIndex the index of the bit to flip * @throws IndexOutOfBoundsException if the specified index is negative * @since 1.4 */ public void flip(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); int wordIndex = wordIndex(bitIndex); expandTo(wordIndex); words[wordIndex] ^= (1L << bitIndex); recalculateWordsInUse(); checkInvariants(); }
/** * Ensures that the BitSet can accommodate a given wordIndex, * temporarily violating the invariants. The caller must * restore the invariants before returning to the user, * possibly using recalculateWordsInUse(). * @param wordIndex the index to be accommodated. */ private void expandTo(int wordIndex) { int wordsRequired = wordIndex+1; if (wordsInUse < wordsRequired) { ensureCapacity(wordsRequired); wordsInUse = wordsRequired; } } /** * Ensures that the BitSet can hold enough words. * @param wordsRequired the minimum acceptable number of words. */ private void ensureCapacity(int wordsRequired) { if (words.length < wordsRequired) { // Allocate larger of doubled size or required size int request = Math.max(2 * words.length, wordsRequired); words = Arrays.copyOf(words, request); sizeIsSticky = false; } }
public void flip(int fromIndex, int toIndex) { checkRange(fromIndex, toIndex); if (fromIndex == toIndex) return; int startWordIndex = wordIndex(fromIndex); int endWordIndex = wordIndex(toIndex - 1); expandTo(endWordIndex); long firstWordMask = WORD_MASK << fromIndex; long lastWordMask = WORD_MASK >>> -toIndex; if (startWordIndex == endWordIndex) { // Case 1: One word words[startWordIndex] ^= (firstWordMask & lastWordMask); } else { // Case 2: Multiple words // Handle first word words[startWordIndex] ^= firstWordMask; // Handle intermediate words, if any for (int i = startWordIndex+1; i < endWordIndex; i++) words[i] ^= WORD_MASK; // Handle last word words[endWordIndex] ^= lastWordMask; } recalculateWordsInUse(); checkInvariants(); }
/** * Sets the bit at the specified index to {@code true}. * * @param bitIndex a bit index * @throws IndexOutOfBoundsException if the specified index is negative * @since JDK1.0 */ public void set(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); int wordIndex = wordIndex(bitIndex); expandTo(wordIndex); words[wordIndex] |= (1L << bitIndex); // Restores invariants checkInvariants(); }
public void set(int bitIndex, boolean value) { if (value) set(bitIndex); else clear(bitIndex); }
/** * Returns the value of the bit with the specified index. The value * is {@code true} if the bit with the index {@code bitIndex} * is currently set in this {@code BitSet}; otherwise, the result * is {@code false}. * * @param bitIndex the bit index * @return the value of the bit with the specified index * @throws IndexOutOfBoundsException if the specified index is negative */ public boolean get(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); checkInvariants(); int wordIndex = wordIndex(bitIndex); return (wordIndex < wordsInUse) && ((words[wordIndex] & (1L << bitIndex)) != 0); }
public BitSet get(int fromIndex, int toIndex) { checkRange(fromIndex, toIndex); checkInvariants(); int len = length(); // If no set bits in range return empty bitset if (len <= fromIndex || fromIndex == toIndex) return new BitSet(0); // An optimization if (toIndex > len) toIndex = len; BitSet result = new BitSet(toIndex - fromIndex); int targetWords = wordIndex(toIndex - fromIndex - 1) + 1; int sourceIndex = wordIndex(fromIndex); boolean wordAligned = ((fromIndex & BIT_INDEX_MASK) == 0); // Process all words but the last word for (int i = 0; i < targetWords - 1; i++, sourceIndex++) result.words[i] = wordAligned ? words[sourceIndex] : (words[sourceIndex] >>> fromIndex) | (words[sourceIndex+1] << -fromIndex); // Process the last word long lastWordMask = WORD_MASK >>> -toIndex; result.words[targetWords - 1] = ((toIndex-1) & BIT_INDEX_MASK) < (fromIndex & BIT_INDEX_MASK) ? /* straddles source words */ ((words[sourceIndex] >>> fromIndex) | (words[sourceIndex+1] & lastWordMask) << -fromIndex) : ((words[sourceIndex] & lastWordMask) >>> fromIndex); // Set wordsInUse correctly result.wordsInUse = targetWords; result.recalculateWordsInUse(); result.checkInvariants(); return result; }
/** * Returns the "logical size" of this {@code BitSet}: the index of * the highest set bit in the {@code BitSet} plus one. Returns zero * if the {@code BitSet} contains no set bits. * * @return the logical size of this {@code BitSet} * @since 1.2 */ public int length() { if (wordsInUse == 0) return 0; return BITS_PER_WORD * (wordsInUse - 1) + (BITS_PER_WORD - Long.numberOfLeadingZeros(words[wordsInUse - 1])); }
/** * Returns the hash code value for this bit set. The hash code depends * Note that the hash code changes if the set of bits is altered. * * @return the hash code value for this bit set */ public int hashCode() { long h = 1234; for (int i = wordsInUse; --i >= 0; ) h ^= words[i] * (i + 1); return (int)((h >> 32) ^ h); }