2016年7月30日土曜日

完備辞書

完備辞書そのものの説明は, ネット検索か,
高速文字列解析の世界<Amazon>で確認してください.

ビット列$B[0,n]\ B[i]\in\{0,1\}$
について, 以下の操作を実現する.
$access(B,i)\ B[i]$を返す
$rank_{b}(B,i)\ B[0,i)$中の$b\in\{0,1\}$の数を返す
$select_{b}(B,i)\ B$中の$i+1$番目に出現した$b\in\{0,1\}$の位置を返す
namespace
{
    class FullyIndexableDictionary
    {
    public:
        static const u32 LShift = 8;
        static const u32 LSize = 256;
        static const u32 SShift = 3;
        static const u32 SSize = 8;
        static const u32 NumSInL = LSize/SSize;

        FullyIndexableDictionary();
        explicit FullyIndexableDictionary(u32 sizeInBits);
        ~FullyIndexableDictionary();

        void clear();
        inline u32 sizeInBits() const;

        void build();

        u32 access(u32 index) const;

        void set(u32 index);
        void reset(u32 index);

        void add(u32 index, bool flag);
        void add(bool flag)
        {
            add(sizeInBits_, flag);
        }

        u32 rank_one(s32 i) const;
        u32 rank_zero(s32 i) const;
        u32 select_one(s32 i) const;
        u32 select_zero(s32 i) const;

        u32 rank_one_brute_force(s32 i) const;
        u32 rank_zero_brute_force(s32 i) const;
        u32 select_one_brute_force(s32 i) const;
        u32 select_zero_brute_force(s32 i) const;
    private:
        FullyIndexableDictionary(const FullyIndexableDictionary&);
        FullyIndexableDictionary& operator=(const FullyIndexableDictionary&);

        static const u32 UnitBytes = 16;
        static const u32 UnitBits = UnitBytes<<3;

        u32 popcount(const u32 v[8]) const;

        u32 sizeInBits_;
        u32 size_;
        u8* bits_;
        u32 sizeL_;
        u32* L1_;
        u32 sizeS_;
        u8* S1_;
    };

    inline u32 FullyIndexableDictionary::sizeInBits() const
    {
        return sizeInBits_;
    }

    template<class T>
    inline s32 lower(s32 N, const T* v, const T& x)
    {
        s32 first=0;
        s32 n = N;
        while(1<n){
            s32 d = n>>1;
            s32 m = first+d;
            if(v[m]<x){
                first = m;
                n -= d;
            }else{
                n = d;
            }
        }
        return (N<=first)? N-1 : first;
    }

    template<class T>
    inline s32 lower(s32 N, const T* v, s32 BlockSize, const T& x)
    {
        s32 first=0;
        s32 n = N;
        while(1<n){
            s32 d = n>>1;
            s32 m = first+d;
            s32 blockSize = m*BlockSize;
            if((blockSize-v[m])<x){
                first = m;
                n -= d;
            }else{
                n = d;
            }
        }
        return (N<=first)? N-1 : first;
    }

    FullyIndexableDictionary::FullyIndexableDictionary()
        :sizeInBits_(0)
        ,size_(0)
        ,bits_(NULL)
        ,sizeL_(0)
        ,L1_(NULL)
        ,sizeS_(0)
        ,S1_(NULL)
    {
    }

    FullyIndexableDictionary::FullyIndexableDictionary(u32 sizeInBits)
        :sizeL_(0)
        ,L1_(NULL)
        ,sizeS_(0)
        ,S1_(NULL)
    {
        u32 sizeInBytes = (sizeInBits<UnitBits)? UnitBytes : ((sizeInBits>>3) + UnitBytes)&~(UnitBytes-1);
        sizeInBits_ = sizeInBits;
        size_ = sizeInBytes;
        bits_ = static_cast<u8*>(malloc(sizeInBytes));
        memset(bits_, 0, sizeInBytes);
    }

    FullyIndexableDictionary::~FullyIndexableDictionary()
    {
        free(L1_); L1_ = NULL;
        free(bits_); bits_ = NULL;
    }

    void FullyIndexableDictionary::clear()
    {
        memset(bits_, 0, size_);
    }

    void FullyIndexableDictionary::build()
    {
        sizeL_ = (sizeInBits_+(LSize-1))>>LShift;
        sizeS_ = NumSInL*sizeL_;
        free(L1_);
        L1_ = static_cast<u32*>(malloc(sizeL_*sizeof(u32) + sizeS_*sizeof(u8)));
        S1_ = reinterpret_cast<u8*>(L1_+sizeL_);

        L1_[0] = 0;
        u8* b = bits_;
        for(u32 l=1; l<sizeL_; ++l,b+=32){
            u32 count = popcount(reinterpret_cast<u32*>(b));
            L1_[l] = L1_[l-1] + count;
        }

        for(u32 l=0; l<sizeL_; ++l){
            u32 ss = l*NumSInL;
            S1_[ss] = 0;
            ++ss;
            for(u32 s=1; s<NumSInL; ++s,++ss){
                u8 count = population_count(bits_[ss-1]);
                S1_[ss] = S1_[ss-1] + count;
            }
        }
    }

    u32 FullyIndexableDictionary::access(u32 index) const
    {
        u32 i = index>>3;
        assert(0<=i && i<size_);
        u32 bit = 0x01U << (index&7);
        return (bit & bits_[i]);
    }

    void FullyIndexableDictionary::set(u32 index)
    {
        u32 i = index>>3;
        assert(0<=i && i<size_);
        u32 bit = 0x01U << (index&7);
        bits_[i] |= bit;
    }

    void FullyIndexableDictionary::reset(u32 index)
    {
        u32 i = index>>3;
        assert(0<=i && i<size_);
        u32 bit = 0x01U << (index&7);
        bits_[i] &= ~bit;
    }

    void FullyIndexableDictionary::add(u32 index, bool flag)
    {
        u32 i = index>>3;
        if(size_<=i){
            u32 oldSizeInBytes = size_;
            u32 sizeInBytes = i;
            sizeInBytes = (sizeInBytes+UnitBytes)&~(UnitBytes-1);
            u8* bits = static_cast<u8*>(malloc(sizeInBytes));
            memset(bits+oldSizeInBytes, 0, sizeInBytes-oldSizeInBytes);
            memcpy(bits, bits_, oldSizeInBytes);
            free(bits_);
            size_ = sizeInBytes;
            bits_ = reinterpret_cast<u8*>(bits);
        }
        if(sizeInBits_<=index){
            sizeInBits_ = index+1;
        }
        u32 bit = 0x01U << (index&7);
        if(flag){
            bits_[i] |= bit;
        } else{
            bits_[i] &= ~bit;
        }
    }

    u32 FullyIndexableDictionary::rank_one(s32 i) const
    {
        assert(0<=i && i<=sizeInBits());
        u32 b = i>>3;
        u32 l = i>>LShift;
        u32 r0 = i&(LSize-1);
        u32 s = r0>>SShift;
        u32 r1 = r0&(SSize-1);
        s += l*NumSInL;

        u32 mask = (0x01U<<r1)-1;
        return L1_[l]+S1_[s]+population_count(bits_[b]&mask);
    }

    u32 FullyIndexableDictionary::rank_zero(s32 i) const
    {
        assert(0<=i && i<=sizeInBits());
        u32 b = i>>3;
        u32 l = i>>LShift;
        u32 r0 = i&(LSize-1);
        u32 s0 = r0>>SShift;
        u32 r1 = r0&(SSize-1);
        u32 s = s0 + l*NumSInL;

        u32 mask = (0x01U<<r1)-1;
        u32 L = (LSize)*l-L1_[l];
        u32 S = (SSize)*s0-S1_[s];
        return L+S+population_count((~bits_[b])&mask);
    }

    u32 FullyIndexableDictionary::select_one(s32 i) const
    {
        ++i;
        u32 l = lower(sizeL_, L1_, (u32)i);
        i -= L1_[l];
        assert(i<=255);
        s32 offset = NumSInL*l;
        const u8* S = S1_ + offset;
        u32 s = lower(NumSInL, S, static_cast<u8>(i));
        i -= S[s];
        offset += s;

        u8 b = bits_[offset];
        for(s32 j=0; j<SSize; ++j){
            if(b&0x01U){
                --i;
                if(i==0){
                    return (offset<<3)+j;
                }
            }
            b>>=1;
        }
        return sizeInBits();
    }

    u32 FullyIndexableDictionary::select_zero(s32 i) const
    {
        ++i;
        u32 l = lower(sizeL_, L1_, LSize, (u32)i);
        i -= L1_[l];
        assert(i<=255);
        s32 offset = NumSInL*l;
        const u8* S = S1_ + offset;
        u32 s = lower(NumSInL, S, SSize, static_cast<u8>(i));
        i -= S[s];
        offset += s;

        u8 b = bits_[offset];
        for(s32 j=0; j<SSize; ++j){
            if(0 == (b&0x01U)){
                --i;
                if(i==0){
                    return (offset<<3)+j;
                }
            }
            b>>=1;
        }
        return sizeInBits();
    }

    u32 FullyIndexableDictionary::rank_one_brute_force(s32 i) const
    {
        assert(0<=i && i<=sizeInBits());
        u32 count=0;
        for(u32 j=0; j<i; ++j){
            if(access(j)){
                ++count;
            }
        }
        return count;
    }

    u32 FullyIndexableDictionary::rank_zero_brute_force(s32 i) const
    {
        assert(0<=i && i<=sizeInBits());
        u32 count=0;
        for(u32 j=0; j<i; ++j){
            if(0==access(j)){
                ++count;
            }
        }
        return count;
    }

    u32 FullyIndexableDictionary::select_one_brute_force(s32 i) const
    {
        ++i;
        u32 count=0;
        u32 size = sizeInBits();
        for(u32 j=0; j<size; ++j){
            if(access(j)){
                ++count;
                if(count == i){
                    return j;
                }
            }
        }
        return size;
    }

    u32 FullyIndexableDictionary::select_zero_brute_force(s32 i) const
    {
        ++i;
        u32 count=0;
        u32 size = sizeInBits();
        for(u32 j=0; j<size; ++j){
            if(0==access(j)){
                ++count;
                if(count == i){
                    return j;
                }
            }
        }
        return size;
    }

    u32 FullyIndexableDictionary::popcount(const u32 v[8]) const
    {
        u32 count = 0;
        for(u32 i=0; i<8; ++i){
            count += population_count(v[i]);
        }
        return count;
    }
}

0 件のコメント:

コメントを投稿