retrievalからtrieなので, treeと同じ発音だそうですが, 日本語ではトライだそうです.
各ノードがそれぞれラベルを持ち, 部分木は共通の接頭辞を持ちます.
"add", "and", "ans"からなるトライ木は以下のようになります.
前処理のための中間データとして作成したため, 効率は考慮していません.
namespace
{
class ByteTrie
{
public:
typedef Tree<Char> tree_type;
typedef Tree<Char>::node_type node_type;
ByteTrie();
~ByteTrie();
bool add(const Char* key, const Char* value);
const Char* find(const Char* key) const;
Char getLabel(const node_type& node) const
{
return node.getLabel();
}
const Char* getValue(const node_type& node) const
{
return (0<=node.getIndex())? values_[node.getIndex()] : NULL;
}
template<class T>
void traverse(T& proc) const;
private:
ByteTrie(const ByteTrie&);
ByteTrie& operator=(const ByteTrie&);
tree_type tree_;
ArrayPOD<Char*> values_;
};
template<class T>
void ByteTrie::traverse(T& proc) const
{
const node_type* node = tree_.getRoot();
ArrayPOD<s32> indices;
indices.push_front(0);
s32 count = 0;
while(0<indices.size()){
s32 index = indices[indices.size()-1];
indices.pop_back();
node = tree.get(index);
for(s32 i=0; i<node->getNumChildren(); ++i){
indices.push_front(node->getChild(i));
}
if(false == proc(count, *node)){
break;
}
++count;
}
}
ByteTrie::ByteTrie()
{
node_type* node = tree_.getRoot();
node->setLabel(0);
}
ByteTrie::~ByteTrie()
{
for(s32 i=0; i<values_.size(); ++i){
free(values_[i]);
values_[i] = NULL;
}
values_.clear();
}
bool ByteTrie::add(const Char* key, const Char* value)
{
assert(NULL != key);
assert(NULL != value);
node_type* node = tree_.getRoot();
while('\0' != *key){
node_type* next = NULL;
for(s32 i=0; i<node->getNumChildren(); ++i){
node_type* child = tree_.get(node->getChild(i));
if(child->getLabel() == *key){
next = child;
break;
}
}
if(NULL == next){
next = tree_.add(node);
next->setLabel(*key);
}
node = next;
++key;
}
s32 len = strlen(value) + 1;
Char* newValue = (Char*)malloc(sizeof(Char)*len);
memcpy(newValue, value, sizeof(Char)*len);
s32 index = node->getIndex();
if(0<=index){
free(values_[index]);
values_[index] = newValue;
}else{
index = values_.size();
values_.push_back(newValue);
}
node->setIndex(index);
return true;
}
const Char* ByteTrie::find(const Char* key) const
{
assert(NULL != key);
const node_type* node = tree_.getRoot();
while('\0' != *key){
const node_type* next = NULL;
for(s32 i=0; i<node->getNumChildren(); ++i){
const node_type* child = tree_.get(node->getChild(i));
if(child->getLabel() == *key){
next = child;
break;
}
}
if(NULL == next){
return NULL;
}
node = next;
++key;
}
return (0<=node->getIndex())? values_[node->getIndex()] : NULL;
}
}
ArrayPODやNode, Treeクラスについても一応あげておきます.
こちらも効率は考えていません.
namespace
{
template<class T>
class ArrayPOD
{
public:
typedef ArrayPOD<T> this_type;
typedef T value_type;
ArrayPOD()
:capacity_(0)
,size_(0)
,data_(NULL)
{}
~ArrayPOD()
{
free(data_);
}
s32 size() const{ return size_;}
void clear(){ size_ = 0;}
void push_front(const value_type& x);
void push_back(const value_type& x);
void pop_front();
void pop_back();
const value_type& operator[](s32 index) const{ return data_[index];}
value_type& operator[](s32 index){ return data_[index];}
private:
ArrayPOD(const this_type&);
ArrayPOD& operator=(const this_type&);
void resize();
s32 capacity_;
s32 size_;
value_type* data_;
};
template<class T>
void ArrayPOD<T>::push_front(const value_type& x)
{
if(capacity_<=size_){
resize();
}
for(s32 i=size_; 1<=i; --i){
data_[i] = data_[i-1];
}
++size_;
data_[0] = x;
}
template<class T>
void ArrayPOD<T>::push_back(const value_type& x)
{
if(capacity_<=size_){
resize();
}
data_[size_] = x;
++size_;
}
template<class T>
void ArrayPOD<T>::pop_front()
{
if(size_<=0){
return;
}
for(s32 i=1; i<size_; ++i){
data_[i-1] = data_[i];
}
--size_;
}
template<class T>
void ArrayPOD<T>::pop_back()
{
if(size_<=0){
return;
}
--size_;
}
template<class T>
void ArrayPOD<T>::resize()
{
s32 capacity = size_ + 16;
value_type* data = (value_type*)malloc(sizeof(value_type)*capacity);
memcpy(data, data_, sizeof(value_type)*size_);
free(data_);
capacity_ = capacity;
data_ = data;
}
//------------------------------------------------------------
//---
//---
//---
//------------------------------------------------------------
template<class T>
class Node
{
public:
Node();
Node(const Node& node);
~Node();
void add(s32 child);
inline s32 getNumChildren() const;
inline s32 getChild(s32 index) const;
s32 getIndex() const{ return index_;}
void setIndex(s32 index){ index_ = index;}
const T& getLabel() const{ return data_;}
void setLabel(const T& data){ data_ = data;}
private:
Node& operator=(const Node&);
s32* children_;
s16 numCapacity_;
s16 numChildren_;
s32 index_;
T data_;
};
template<class T>
Node<T>::Node()
:children_(NULL)
,numCapacity_(0)
,numChildren_(0)
,index_(-1)
{}
template<class T>
Node<T>::Node(const Node& node)
:numCapacity_(node.numCapacity_)
,numChildren_(node.numChildren_)
{
children_ = (s32*)malloc(sizeof(s32)*numCapacity_);
memcpy(children_, node.children_, sizeof(s32)*numCapacity_);
index_ = node.index_;
data_ = node.data_;
}
template<class T>
Node<T>::~Node()
{
free(children_);
children_ = NULL;
}
template<class T>
void Node<T>::add(s32 child)
{
if(numCapacity_<=numChildren_){
s32 newCapacity = numCapacity_+4;
s32* children = (s32*)malloc(sizeof(s32)*newCapacity);
memcpy(children, children_, sizeof(s32)*numCapacity_);
free(children_);
numCapacity_ = newCapacity;
children_ = children;
}
children_[numChildren_] = child;
++numChildren_;
}
template<class T>
inline s32 Node<T>::getNumChildren() const
{
return numChildren_;
}
template<class T>
inline s32 Node<T>::getChild(s32 index) const
{
return children_[index];
}
//------------------------------------------------------------
//---
//---
//---
//------------------------------------------------------------
template<class T>
class Tree
{
public:
typedef Node<T> node_type;
Tree();
~Tree();
const node_type* getRoot() const;
node_type* getRoot();
node_type* add(node_type* parent);
const node_type* get(s32 index) const;
node_type* get(s32 index);
private:
Tree(const Tree&);
Tree& operator=(const Tree&);
node_type* addRoot();
void resize();
s32 capacity_;
s32 size_;
node_type* nodes_;
};
template<class T>
Tree<T>::Tree()
:capacity_(0)
,size_(0)
,nodes_(NULL)
{
addRoot();
}
template<class T>
Tree<T>::~Tree()
{
for(s32 i=0; i<capacity_; ++i){
nodes_[i].~node_type();
}
free(nodes_);
nodes_ = NULL;
}
template<class T>
typename Tree<T>::node_type* Tree<T>::addRoot()
{
if(0<size_){
return getRoot();
}
resize();
size_ = 1;
return &nodes_[0];
}
template<class T>
const typename Tree<T>::node_type* Tree<T>::getRoot() const
{
return (size_<=0)? NULL : &nodes_[0];
}
template<class T>
typename Tree<T>::node_type* Tree<T>::getRoot()
{
return (size_<=0)? NULL : &nodes_[0];
}
template<class T>
typename Tree<T>::node_type* Tree<T>::add(node_type* parent)
{
assert(NULL != parent);
if(capacity_<=size_){
resize();
}
parent->add(size_);
node_type* node = nodes_ + size_;
++size_;
return node;
}
template<class T>
const typename Tree<T>::node_type* Tree<T>::get(s32 index) const
{
assert(0<=index && index<size_);
return &nodes_[index];
}
template<class T>
typename Tree<T>::node_type* Tree<T>::get(s32 index)
{
assert(0<=index && index<size_);
return &nodes_[index];
}
template<class T>
void Tree<T>::resize()
{
s32 capacity = capacity_ + 16;
node_type* nodes = (node_type*)malloc(sizeof(node_type)*capacity);
for(s32 i=0; i<capacity_; ++i){
new(&nodes[i]) node_type(nodes_[i]);
nodes_[i].~node_type();
}
for(s32 i=capacity_; i<capacity; ++i){
new(&nodes[i]) node_type();
}
free(nodes_);
capacity_ = capacity;
nodes_ = nodes;
}
}

0 件のコメント:
コメントを投稿