Counting bits set

Problem statement: given a 32-bit integer, write a function that returns the number of bits in the binary representation of the integer which are set.

For example,

count_bits(0x00000001) → 1<br /> count_bits(0xFFFFFFFF) → 32<br /> count_bits(0x10101010) → 4

Much credit is due to this page of bit twiddling hacks.

The naïve solution is to shift the input by one bit and check the last bit 32 times:

unsigned int count_bits_naive1 (unsigned int num)
{
    int count = 0;
    for (int i=0; i<32; i++) {
        count += (num >> i) & 1;
    }
    return count;
}

This version adds the optimization so that it stops once the leftmost set bit is reached. No need to continue counting if num == 0:

unsigned int count_bits_naive2 (unsigned int num)
{
    int count = 0;
    while (num) {
        count += (num & 1);
        num = num >> 1;
    }
    return count;
}

We can reduce the number of operations by pre-calculating the bit sum for all possible octets and then just looking at the 4 octets in a 32-bit integer:

static const unsigned char BitsSetTable256[256] =
{
#   define B2(n) n,     n+1,     n+1,     n+2
#   define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2)
#   define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2)
    B6(0), B6(1), B6(1), B6(2)
};
unsigned int count_bits_table (unsigned int num)
{
    return BitsSetTable256[num       & 0x000000FF] +
           BitsSetTable256[num >>  8 & 0x000000FF] +
           BitsSetTable256[num >> 16 & 0x000000FF] +
           BitsSetTable256[num >> 24             ];
}

Finally, we can count the bits by summing in parallel, 16 pairs of 1-bit integers, 8 pairs of 2-bit integers, 4 pairs of 4-bit integers, 2 pairs of 8-bit integers, and finally 1 pair of 16-bit integers:

static const unsigned int B[] = {
0b01010101010101010101010101010101,
0b00110011001100110011001100110011,
0b00001111000011110000111100001111,
0b00000000111111110000000011111111,
0b00000000000000001111111111111111
};
unsigned int count_bits_parallel (unsigned int num)
{
    num = (num & B[0]) + (num >> 1  & B[0]);
    num = (num & B[1]) + (num >> 2  & B[1]);
    num = (num & B[2]) + (num >> 4  & B[2]);
    num = (num & B[3]) + (num >> 8  & B[3]);
    num = (num & B[4]) + (num >> 16 & B[4]);
    return num;
}

This method as written has more instructions than the previous version but uses less memory.

Here’s the whole C program:

#include <stdio.h>

void ok(int);
unsigned int count_bits_naive1(unsigned int);
unsigned int count_bits_naive2(unsigned int);
unsigned int count_bits_table(unsigned int);
unsigned int count_bits_parallel(unsigned int);
void testgroup (unsigned int (*)(unsigned int));

int test_count = 0;

int main (int argc, const char * argv[])
{
    testgroup(&count_bits_naive1);
    testgroup(&count_bits_naive2);
    testgroup(&count_bits_table);
    testgroup(&count_bits_parallel);
    return 0;
}

unsigned int count_bits_naive1 (unsigned int num)
{
    int count = 0;
    for (int i=0; i<32; i++) {
        count += (num >> i) & 1;
    }
    return count;
}

// Stops once leftmost set bit is reached
unsigned int count_bits_naive2 (unsigned int num)
{
    int count = 0;
    while (num) {
        count += (num & 1);
        num = num >> 1;
    }
    return count;
}

// Using a pre-calculated table for all bytes
static const unsigned char BitsSetTable256[256] =
{
#   define B2(n) n,     n+1,     n+1,     n+2
#   define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2)
#   define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2)
    B6(0), B6(1), B6(1), B6(2)
};
unsigned int count_bits_table (unsigned int num)
{
    return BitsSetTable256[num       & 0x000000FF] +
           BitsSetTable256[num >>  8 & 0x000000FF] +
           BitsSetTable256[num >> 16 & 0x000000FF] +
           BitsSetTable256[num >> 24             ];
}

// Using summing in parallel
static const unsigned int B[] = {
0b01010101010101010101010101010101,
0b00110011001100110011001100110011,
0b00001111000011110000111100001111,
0b00000000111111110000000011111111,
0b00000000000000001111111111111111
};
unsigned int count_bits_parallel (unsigned int num)
{
    num = (num & B[0]) + (num >> 1  & B[0]);
    num = (num & B[1]) + (num >> 2  & B[1]);
    num = (num & B[2]) + (num >> 4  & B[2]);
    num = (num & B[3]) + (num >> 8  & B[3]);
    num = (num & B[4]) + (num >> 16 & B[4]);
    return num;
}

void testgroup (unsigned int count_bits(unsigned int))
{
    ok(count_bits(0xFFFFFFFF) == 32);
    ok(count_bits(1) == 1);
    ok(count_bits(0) == 0);
    ok(count_bits(0x10101010) == 4);
    ok(count_bits(0x01010101) == 4);
    ok(count_bits(0xFFFF0000) == 16);
    ok(count_bits(0x00FF00FF) == 16);
}

void ok (int test) {
    printf(test ? "ok %dn" : "NOT ok %dn", ++test_count);
}

Leave a Reply

Your email address will not be published.

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>

 
To comment, click below to log in.