retrievalからtrieなので, treeと同じ発音だそうですが, 日本語ではトライだそうです.
各ノードがそれぞれラベルを持ち, 部分木は共通の接頭辞を持ちます.
"add", "and", "ans"からなるトライ木は以下のようになります.
前処理のための中間データとして作成したため, 効率は考慮していません.
namespace { class ByteTrie { public: typedef Tree<Char> tree_type; typedef Tree<Char>::node_type node_type; ByteTrie(); ~ByteTrie(); bool add(const Char* key, const Char* value); const Char* find(const Char* key) const; Char getLabel(const node_type& node) const { return node.getLabel(); } const Char* getValue(const node_type& node) const { return (0<=node.getIndex())? values_[node.getIndex()] : NULL; } template<class T> void traverse(T& proc) const; private: ByteTrie(const ByteTrie&); ByteTrie& operator=(const ByteTrie&); tree_type tree_; ArrayPOD<Char*> values_; }; template<class T> void ByteTrie::traverse(T& proc) const { const node_type* node = tree_.getRoot(); ArrayPOD<s32> indices; indices.push_front(0); s32 count = 0; while(0<indices.size()){ s32 index = indices[indices.size()-1]; indices.pop_back(); node = tree.get(index); for(s32 i=0; i<node->getNumChildren(); ++i){ indices.push_front(node->getChild(i)); } if(false == proc(count, *node)){ break; } ++count; } } ByteTrie::ByteTrie() { node_type* node = tree_.getRoot(); node->setLabel(0); } ByteTrie::~ByteTrie() { for(s32 i=0; i<values_.size(); ++i){ free(values_[i]); values_[i] = NULL; } values_.clear(); } bool ByteTrie::add(const Char* key, const Char* value) { assert(NULL != key); assert(NULL != value); node_type* node = tree_.getRoot(); while('\0' != *key){ node_type* next = NULL; for(s32 i=0; i<node->getNumChildren(); ++i){ node_type* child = tree_.get(node->getChild(i)); if(child->getLabel() == *key){ next = child; break; } } if(NULL == next){ next = tree_.add(node); next->setLabel(*key); } node = next; ++key; } s32 len = strlen(value) + 1; Char* newValue = (Char*)malloc(sizeof(Char)*len); memcpy(newValue, value, sizeof(Char)*len); s32 index = node->getIndex(); if(0<=index){ free(values_[index]); values_[index] = newValue; }else{ index = values_.size(); values_.push_back(newValue); } node->setIndex(index); return true; } const Char* ByteTrie::find(const Char* key) const { assert(NULL != key); const node_type* node = tree_.getRoot(); while('\0' != *key){ const node_type* next = NULL; for(s32 i=0; i<node->getNumChildren(); ++i){ const node_type* child = tree_.get(node->getChild(i)); if(child->getLabel() == *key){ next = child; break; } } if(NULL == next){ return NULL; } node = next; ++key; } return (0<=node->getIndex())? values_[node->getIndex()] : NULL; } }
ArrayPODやNode, Treeクラスについても一応あげておきます.
こちらも効率は考えていません.
namespace { template<class T> class ArrayPOD { public: typedef ArrayPOD<T> this_type; typedef T value_type; ArrayPOD() :capacity_(0) ,size_(0) ,data_(NULL) {} ~ArrayPOD() { free(data_); } s32 size() const{ return size_;} void clear(){ size_ = 0;} void push_front(const value_type& x); void push_back(const value_type& x); void pop_front(); void pop_back(); const value_type& operator[](s32 index) const{ return data_[index];} value_type& operator[](s32 index){ return data_[index];} private: ArrayPOD(const this_type&); ArrayPOD& operator=(const this_type&); void resize(); s32 capacity_; s32 size_; value_type* data_; }; template<class T> void ArrayPOD<T>::push_front(const value_type& x) { if(capacity_<=size_){ resize(); } for(s32 i=size_; 1<=i; --i){ data_[i] = data_[i-1]; } ++size_; data_[0] = x; } template<class T> void ArrayPOD<T>::push_back(const value_type& x) { if(capacity_<=size_){ resize(); } data_[size_] = x; ++size_; } template<class T> void ArrayPOD<T>::pop_front() { if(size_<=0){ return; } for(s32 i=1; i<size_; ++i){ data_[i-1] = data_[i]; } --size_; } template<class T> void ArrayPOD<T>::pop_back() { if(size_<=0){ return; } --size_; } template<class T> void ArrayPOD<T>::resize() { s32 capacity = size_ + 16; value_type* data = (value_type*)malloc(sizeof(value_type)*capacity); memcpy(data, data_, sizeof(value_type)*size_); free(data_); capacity_ = capacity; data_ = data; } //------------------------------------------------------------ //--- //--- //--- //------------------------------------------------------------ template<class T> class Node { public: Node(); Node(const Node& node); ~Node(); void add(s32 child); inline s32 getNumChildren() const; inline s32 getChild(s32 index) const; s32 getIndex() const{ return index_;} void setIndex(s32 index){ index_ = index;} const T& getLabel() const{ return data_;} void setLabel(const T& data){ data_ = data;} private: Node& operator=(const Node&); s32* children_; s16 numCapacity_; s16 numChildren_; s32 index_; T data_; }; template<class T> Node<T>::Node() :children_(NULL) ,numCapacity_(0) ,numChildren_(0) ,index_(-1) {} template<class T> Node<T>::Node(const Node& node) :numCapacity_(node.numCapacity_) ,numChildren_(node.numChildren_) { children_ = (s32*)malloc(sizeof(s32)*numCapacity_); memcpy(children_, node.children_, sizeof(s32)*numCapacity_); index_ = node.index_; data_ = node.data_; } template<class T> Node<T>::~Node() { free(children_); children_ = NULL; } template<class T> void Node<T>::add(s32 child) { if(numCapacity_<=numChildren_){ s32 newCapacity = numCapacity_+4; s32* children = (s32*)malloc(sizeof(s32)*newCapacity); memcpy(children, children_, sizeof(s32)*numCapacity_); free(children_); numCapacity_ = newCapacity; children_ = children; } children_[numChildren_] = child; ++numChildren_; } template<class T> inline s32 Node<T>::getNumChildren() const { return numChildren_; } template<class T> inline s32 Node<T>::getChild(s32 index) const { return children_[index]; } //------------------------------------------------------------ //--- //--- //--- //------------------------------------------------------------ template<class T> class Tree { public: typedef Node<T> node_type; Tree(); ~Tree(); const node_type* getRoot() const; node_type* getRoot(); node_type* add(node_type* parent); const node_type* get(s32 index) const; node_type* get(s32 index); private: Tree(const Tree&); Tree& operator=(const Tree&); node_type* addRoot(); void resize(); s32 capacity_; s32 size_; node_type* nodes_; }; template<class T> Tree<T>::Tree() :capacity_(0) ,size_(0) ,nodes_(NULL) { addRoot(); } template<class T> Tree<T>::~Tree() { for(s32 i=0; i<capacity_; ++i){ nodes_[i].~node_type(); } free(nodes_); nodes_ = NULL; } template<class T> typename Tree<T>::node_type* Tree<T>::addRoot() { if(0<size_){ return getRoot(); } resize(); size_ = 1; return &nodes_[0]; } template<class T> const typename Tree<T>::node_type* Tree<T>::getRoot() const { return (size_<=0)? NULL : &nodes_[0]; } template<class T> typename Tree<T>::node_type* Tree<T>::getRoot() { return (size_<=0)? NULL : &nodes_[0]; } template<class T> typename Tree<T>::node_type* Tree<T>::add(node_type* parent) { assert(NULL != parent); if(capacity_<=size_){ resize(); } parent->add(size_); node_type* node = nodes_ + size_; ++size_; return node; } template<class T> const typename Tree<T>::node_type* Tree<T>::get(s32 index) const { assert(0<=index && index<size_); return &nodes_[index]; } template<class T> typename Tree<T>::node_type* Tree<T>::get(s32 index) { assert(0<=index && index<size_); return &nodes_[index]; } template<class T> void Tree<T>::resize() { s32 capacity = capacity_ + 16; node_type* nodes = (node_type*)malloc(sizeof(node_type)*capacity); for(s32 i=0; i<capacity_; ++i){ new(&nodes[i]) node_type(nodes_[i]); nodes_[i].~node_type(); } for(s32 i=capacity_; i<capacity; ++i){ new(&nodes[i]) node_type(); } free(nodes_); capacity_ = capacity; nodes_ = nodes; } }
0 件のコメント:
コメントを投稿