2016年7月29日金曜日

Trie

トライ木と呼ばれるデータ構造を作成します.
retrievalからtrieなので, treeと同じ発音だそうですが, 日本語ではトライだそうです.
各ノードがそれぞれラベルを持ち, 部分木は共通の接頭辞を持ちます.
"add", "and", "ans"からなるトライ木は以下のようになります.

前処理のための中間データとして作成したため, 効率は考慮していません.
namespace
{
    class ByteTrie
    {
    public:
        typedef Tree<Char> tree_type;
        typedef Tree<Char>::node_type node_type;

        ByteTrie();
        ~ByteTrie();

        bool add(const Char* key, const Char* value);
        const Char* find(const Char* key) const;

        Char getLabel(const node_type& node) const
        {
            return node.getLabel();
        }

        const Char* getValue(const node_type& node) const
        {
            return (0<=node.getIndex())? values_[node.getIndex()] : NULL;
        }

        template<class T>
        void traverse(T& proc) const;
    private:
        ByteTrie(const ByteTrie&);
        ByteTrie& operator=(const ByteTrie&);

        tree_type tree_;
        ArrayPOD<Char*> values_;
    };

    template<class T>
    void ByteTrie::traverse(T& proc) const
    {
        const node_type* node = tree_.getRoot();

        ArrayPOD<s32> indices;
        indices.push_front(0);
        s32 count = 0;
        while(0<indices.size()){
            s32 index = indices[indices.size()-1];
            indices.pop_back();
            node = tree.get(index);
            for(s32 i=0; i<node->getNumChildren(); ++i){
                indices.push_front(node->getChild(i));
            }
            if(false == proc(count, *node)){
                break;
            }
            ++count;
        }
    }

    ByteTrie::ByteTrie()
    {
        node_type* node = tree_.getRoot();
        node->setLabel(0);
    }

    ByteTrie::~ByteTrie()
    {
        for(s32 i=0; i<values_.size(); ++i){
            free(values_[i]);
            values_[i] = NULL;
        }
        values_.clear();
    }

    bool ByteTrie::add(const Char* key, const Char* value)
    {
        assert(NULL != key);
        assert(NULL != value);

        node_type* node = tree_.getRoot();
        while('\0' != *key){
            node_type* next = NULL;
            for(s32 i=0; i<node->getNumChildren(); ++i){
                node_type* child = tree_.get(node->getChild(i));
                if(child->getLabel() == *key){
                    next = child;
                    break;
                }
            }
            if(NULL == next){
                next = tree_.add(node);
                next->setLabel(*key);
            }
            node = next;
            ++key;
        }

        s32 len = strlen(value) + 1;
        Char* newValue = (Char*)malloc(sizeof(Char)*len);
        memcpy(newValue, value, sizeof(Char)*len);

        s32 index = node->getIndex();
        if(0<=index){
            free(values_[index]);
            values_[index] = newValue;
        }else{
            index = values_.size();
            values_.push_back(newValue);
        }
        node->setIndex(index);
        return true;
    }

    const Char* ByteTrie::find(const Char* key) const
    {
        assert(NULL != key);
        const node_type* node = tree_.getRoot();

        while('\0' != *key){
            const node_type* next = NULL;
            for(s32 i=0; i<node->getNumChildren(); ++i){
                const node_type* child = tree_.get(node->getChild(i));
                if(child->getLabel() == *key){
                    next = child;
                    break;
                }
            }
            if(NULL == next){
                return NULL;
            }
            node = next;
            ++key;
        }
        return (0<=node->getIndex())? values_[node->getIndex()] : NULL;
    }
}

ArrayPODやNode, Treeクラスについても一応あげておきます.
こちらも効率は考えていません.
namespace
{
    template<class T>
    class ArrayPOD
    {
    public:
        typedef ArrayPOD<T> this_type;
        typedef T value_type;

        ArrayPOD()
            :capacity_(0)
            ,size_(0)
            ,data_(NULL)
        {}

        ~ArrayPOD()
        {
            free(data_);
        }

        s32 size() const{ return size_;}
        void clear(){ size_ = 0;}

        void push_front(const value_type& x);
        void push_back(const value_type& x);
        void pop_front();
        void pop_back();

        const value_type& operator[](s32 index) const{ return data_[index];}
        value_type& operator[](s32 index){ return data_[index];}
    private:
        ArrayPOD(const this_type&);
        ArrayPOD& operator=(const this_type&);

        void resize();

        s32 capacity_;
        s32 size_;
        value_type* data_;
    };

    template<class T>
    void ArrayPOD<T>::push_front(const value_type& x)
    {
        if(capacity_<=size_){
            resize();
        }
        for(s32 i=size_; 1<=i; --i){
            data_[i] = data_[i-1];
        }
        ++size_;
        data_[0] = x;
    }

    template<class T>
    void ArrayPOD<T>::push_back(const value_type& x)
    {
        if(capacity_<=size_){
            resize();
        }
        data_[size_] = x;
        ++size_;
    }

    template<class T>
    void ArrayPOD<T>::pop_front()
    {
        if(size_<=0){
            return;
        }
        for(s32 i=1; i<size_; ++i){
            data_[i-1] = data_[i];
        }
        --size_;
    }

    template<class T>
    void ArrayPOD<T>::pop_back()
    {
        if(size_<=0){
            return;
        }
        --size_;
    }

    template<class T>
    void ArrayPOD<T>::resize()
    {
        s32 capacity = size_ + 16;
        value_type* data = (value_type*)malloc(sizeof(value_type)*capacity);
        memcpy(data, data_, sizeof(value_type)*size_);
        free(data_);
        capacity_ = capacity;
        data_ = data;
    }

    //------------------------------------------------------------
    //---
    //---
    //---
    //------------------------------------------------------------
    template<class T>
    class Node
    {
    public:
        Node();
        Node(const Node& node);
        ~Node();

        void add(s32 child);
        inline s32 getNumChildren() const;
        inline s32 getChild(s32 index) const;

        s32 getIndex() const{ return index_;}
        void setIndex(s32 index){ index_ = index;}

        const T& getLabel() const{ return data_;}
        void setLabel(const T& data){ data_ = data;}
    private:
        Node& operator=(const Node&);

        s32* children_;
        s16 numCapacity_;
        s16 numChildren_;
        s32 index_;
        T data_;
    };

    template<class T>
    Node<T>::Node()
        :children_(NULL)
        ,numCapacity_(0)
        ,numChildren_(0)
        ,index_(-1)
    {}

    template<class T>
    Node<T>::Node(const Node& node)
        :numCapacity_(node.numCapacity_)
        ,numChildren_(node.numChildren_)
    {
        children_ = (s32*)malloc(sizeof(s32)*numCapacity_);
        memcpy(children_, node.children_, sizeof(s32)*numCapacity_);
        index_ = node.index_;
        data_ = node.data_;
    }

    template<class T>
    Node<T>::~Node()
    {
        free(children_);
        children_ = NULL;
    }

    template<class T>
    void Node<T>::add(s32 child)
    {
        if(numCapacity_<=numChildren_){
            s32 newCapacity = numCapacity_+4;
            s32* children = (s32*)malloc(sizeof(s32)*newCapacity);
            memcpy(children, children_, sizeof(s32)*numCapacity_);
            free(children_);
            numCapacity_ = newCapacity;
            children_ = children;
        }
        children_[numChildren_] = child;
        ++numChildren_;
    }

    template<class T>
    inline s32 Node<T>::getNumChildren() const
    {
        return numChildren_;
    }

    template<class T>
    inline s32 Node<T>::getChild(s32 index) const
    {
        return children_[index];
    }

    //------------------------------------------------------------
    //---
    //---
    //---
    //------------------------------------------------------------
    template<class T>
    class Tree
    {
    public:
        typedef Node<T> node_type;

        Tree();
        ~Tree();

        const node_type* getRoot() const;
        node_type* getRoot();
        node_type* add(node_type* parent);
        const node_type* get(s32 index) const;
        node_type* get(s32 index);

    private:
        Tree(const Tree&);
        Tree& operator=(const Tree&);

        node_type* addRoot();
        void resize();

        s32 capacity_;
        s32 size_;
        node_type* nodes_;
    };

    template<class T>
    Tree<T>::Tree()
        :capacity_(0)
        ,size_(0)
        ,nodes_(NULL)
    {
        addRoot();
    }

    template<class T>
    Tree<T>::~Tree()
    {
        for(s32 i=0; i<capacity_; ++i){
            nodes_[i].~node_type();
        }
        free(nodes_);
        nodes_ = NULL;
    }

    template<class T>
    typename Tree<T>::node_type* Tree<T>::addRoot()
    {
        if(0<size_){
            return getRoot();
        }
        resize();
        size_ = 1;
        return &nodes_[0];
    }

    template<class T>
    const typename Tree<T>::node_type* Tree<T>::getRoot() const
    {
        return (size_<=0)? NULL : &nodes_[0];
    }

    template<class T>
    typename Tree<T>::node_type* Tree<T>::getRoot()
    {
        return (size_<=0)? NULL : &nodes_[0];
    }

    template<class T>
    typename Tree<T>::node_type* Tree<T>::add(node_type* parent)
    {
        assert(NULL != parent);
        if(capacity_<=size_){
            resize();
        }
        parent->add(size_);
        node_type* node = nodes_ + size_;
        ++size_;
        return node;
    }

    template<class T>
    const typename Tree<T>::node_type* Tree<T>::get(s32 index) const
    {
        assert(0<=index && index<size_);
        return &nodes_[index];
    }

    template<class T>
    typename Tree<T>::node_type* Tree<T>::get(s32 index)
    {
        assert(0<=index && index<size_);
        return &nodes_[index];
    }

    template<class T>
    void Tree<T>::resize()
    {
        s32 capacity = capacity_ + 16;
        node_type* nodes = (node_type*)malloc(sizeof(node_type)*capacity);
        for(s32 i=0; i<capacity_; ++i){
            new(&nodes[i]) node_type(nodes_[i]);
            nodes_[i].~node_type();
        }
        for(s32 i=capacity_; i<capacity; ++i){
            new(&nodes[i]) node_type();
        }
        free(nodes_);
        capacity_ = capacity;
        nodes_ = nodes;
    }
}

0 件のコメント:

コメントを投稿