"""
A general immutable tree module. This module is used when parsing a serial sentence into a Tree
structure.
"""
from typing import Any, Callable, Iterator, Optional, Sequence, overload
class _TreeBuilder[T]:
"""
A TreeBuilder is a utility to create arbitrary, immutable Trees. TreeBuilder
works by traversing and creating a tree structure, and then providing an
immutable view of the Tree on build. A TreeBuilder has an internal cursor
on a Tree's node.
A TreeBuilder can be reused to create many trees. This is useful if many
similar Trees must be created. Note however, that if multiple Trees are
created from the same TreeBuilder, Tree nodes will be unique, but data on
the nodes will be shallow copies.
"""
def __init__(self) -> None:
"""
Creates a new empty TreeBuilder, with no internal data.
"""
# Unfortunately, these must remain at Any since mypy cannot infer the
# None check properly in _assert_initialization_status.
self.root: Any = None
self.current: Any = None
self.constructed: bool = False
def create_root(self, data: T) -> None:
"""
Creates the root of the tree. This is the first step in Tree building.
This should be called when a TreeBuilder is first created.
Args:
data: The optional data to put on the root node.
"""
self.root = Tree(data)
self.current = self.root
def move_to_parent(self) -> None:
"""
Move the internal cursor of the TreeBuilder to cursor location's parent.
Raises:
ValueError: If the TreeBuilder's root has not been initialized, or
if the builder is already at the root.
"""
self._assert_initialization_status()
if self.current is self.root:
raise ValueError("Currently at root. Cannot move to parent")
self.current = self.current.parent
def move_to_child(self, i: int) -> None:
"""
Move the internal cursor to the cursor location's i-th child.
Args:
i: The index of the child to move to.
Raises:
IndexError: If the child index is out of range.
ValueError: If the TreeBuilder's root has not been initialized.
"""
self._assert_initialization_status()
try:
self.current = self.current[i]
except IndexError as e:
raise IndexError(
f"{i}-th child is out of range. There are {len(self.current)} children on this node"
) from e
def move_to_root(self) -> None:
"""
Move the internal cursor to the root of the entire tree.
Raises:
ValueError: If the TreeBuilder's root has not been initialized.
"""
self._assert_initialization_status()
self.current = self.root
def set_data(self, data: T) -> None:
"""
Sets the data for the cursor location's node.
Args:
data: The data to place on the current cursor location's node.
Raises:
ValueError: If the TreeBuilder's root has not been initialized.
"""
self._assert_initialization_status()
self._copy_if_necessary()
self.current._data = data
def remove_child(self, i: int) -> None:
"""
Remove the i-th child from the cursor location.
Args:
i: The index of the child to remove.
Raises:
IndexError: If the child is out of range.
ValueError: If the TreeBuilder's root has not been initialized.
"""
self._assert_initialization_status()
self._copy_if_necessary()
try:
del self.current._children[i]
except IndexError as e:
raise IndexError(
f"{i}-th child is out of range. There are {len(self.current)} children on this node"
) from e
def add_child(self, data: T, move: bool = False) -> None:
"""
Adds a child to the current cursor location's node.
Children indices are directly related to the order of their creation.
Args:
data: The data to put on the created child.
move: Flag to indicate if the cursor should move to this child.
Raises:
ValueError: If the TreeBuilder's root has not been initialized.
"""
self._assert_initialization_status()
self._copy_if_necessary()
child: Tree[T] = Tree(data)
child._parent = self.current
self.current._children.append(child)
if move:
l = len(self.current)
self.move_to_child(l - 1)
def build(self) -> Tree[T]:
"""
Provides an immutable reference to the constructed tree.
Returns:
The constructed Tree reference.
Raises:
ValueError: If the TreeBuilder's root has not been initialized.
"""
self._assert_initialization_status()
self.constructed = True
return self.root
def _copy_if_necessary(self) -> None:
"""
Checks if a copy is necessary because the Tree has been built.
"""
if self.constructed:
self._copy()
self.constructed = False
def _copy(self) -> None:
"""
Internal method to copy the constructed Tree in memory.
"""
new_root: Tree[T] = Tree(self.root.data)
new_current = None
if self.current is self.root:
new_current = new_root
queue = [(new_root, self.root._children)]
while queue:
new_parent, children = queue.pop()
new_children = []
for child in children:
new_child: Tree[T] = Tree(child.data)
new_child._parent = new_parent
new_children.append(new_child)
queue.append((new_child, child._children))
if self.current is child:
new_current = new_child
new_parent._children = new_children
self.root = new_root
self.current = new_current
def _assert_initialization_status(self) -> None:
"""
Asserts the initialization invariant on the root of this builder.
Raises:
ValueError: If the TreeBuilder's root has not been initialized.
"""
if self.root is None:
raise ValueError("The TreeBuilder has not created a root for the Tree yet")
[docs]
class Tree[T]:
"""
A tree node. This is the base representation for a tree, which can have many
children which are accessible via child index. The tree's structure is
immutable, so the data, parent, children cannot be changed once created.
As is this class is useless, and must be created with the TreeBuilder
module which is a sort of friend class of Tree to maintain its immutable
public contract.
"""
[docs]
def __init__(self, data: T) -> None:
"""
Create a tree holding the value. Create a larger Tree, with TreeBuilder.
Args:
data: The data to put with the Tree node.
"""
self._data: T = data
self._parent: Optional["Tree[T]"] = None
self._children: list["Tree[T]"] = []
@property
def data(self) -> T:
"""
The data on the tree node. The property ensures it is readonly.
Returns:
The data stored on the Tree.
"""
return self._data
@property
def parent(self) -> Optional["Tree[T]"]:
"""
Provides the parent of the Tree. The property ensures it is readonly.
Returns:
A pointer to the parent Tree reference. None if there is no parent.
"""
return self._parent
@overload
def __getitem__(self, key: int) -> "Tree[T]": ...
@overload
def __getitem__(self, key: slice) -> list["Tree[T]"]: ...
[docs]
def __getitem__(self, key):
"""
Get specific children from the Tree. This can be an integer or slice.
Args:
key: The indexer for the item.
"""
return self._children[key]
[docs]
def __iter__(self) -> Iterator["Tree[T]"]:
"""
Provides an iterator over the children.
"""
yield from self._children
[docs]
def __len__(self) -> int:
"""
Provides the number of direct children on the tree.
Returns:
The number of direct children on the tree.
"""
return len(self._children)
def _create_tree_helper[K, I](
builder: _TreeBuilder, node: K, to_id: Callable[[K], I], children_tokens: dict[I, list[K]]
) -> None:
"""
Method to help create a tree from a sentence given the root token.
Args:
builder: The TreeBuilder currently being used to create the Tree.
node: The current token we are constructing the tree at.
to_id: Callable that transforms a token into its id.
children_tokens: A dictionary from token id to children tokens.
Returns:
A Tree constructed given the sentence structure.
"""
try:
tokens = children_tokens[to_id(node)]
except KeyError:
tokens = []
for token in tokens:
builder.add_child(data=token, move=True)
_create_tree_helper(builder, token, to_id, children_tokens)
builder.move_to_parent()
[docs]
def from_tokens[K, I](
tokens: Sequence[K],
root_id: I,
to_id: Callable[[K], I],
to_head: Callable[[K], I],
skip: Optional[Callable[[K], bool]] = None,
) -> Tree[K]:
"""
The completely generic function to create a Tree structure for a sequence of Tokens.
This can be used for tokens other than the pre-defined CoNLL-U schema.
Args:
tokens: The tokens to create the tree from.
root_id: The root token of the tree will be a child of this id.
to_id: The mapper from the token to its id.
to_head: The mapper from the token to the id of its parent.
skip: The optional guard to skip certain tokens that may not participate in the Tree
structure.
"""
children_tokens: dict[I, list[K]] = {}
for token in tokens:
if skip is not None and skip(token):
continue
h = to_head(token)
try:
children_tokens[h].append(token)
except KeyError:
children_tokens[h] = [token]
builder: _TreeBuilder[K] = _TreeBuilder()
starters = children_tokens.get(root_id)
if starters is None:
raise ValueError("The current sentence has no root token.")
if len(starters) != 1:
raise ValueError("There should be exactly one root token in a sentence.")
root_token = starters[0]
builder.create_root(root_token)
_create_tree_helper(builder, root_token, to_id, children_tokens)
root = builder.build()
return root