import random
[docs]def reverse_dataset(train=False,
dev=False,
test=False,
train_rows=10000,
dev_rows=1000,
test_rows=1000,
seq_max_length=10):
"""
Load the Reverse dataset.
The Reverse dataset is a simple task of reversing a list of numbers. This dataset is useful
for testing implementations of sequence to sequence models.
Args:
train (bool, optional): If to load the training split of the dataset.
dev (bool, optional): If to load the development split of the dataset.
test (bool, optional): If to load the test split of the dataset.
train_rows (int, optional): Number of training rows to generate.
dev_rows (int, optional): Number of development rows to generate.
test_rows (int, optional): Number of test rows to generate.
seq_max_length (int, optional): Maximum sequence length.
Returns:
:class:`tuple` of :class:`iterable` or :class:`iterable`:
Returns between one and all dataset splits (train, dev and test) depending on if their
respective boolean argument is ``True``.
Example:
>>> from torchnlp.random import set_seed
>>> set_seed(321)
>>>
>>> from torchnlp.datasets import reverse_dataset
>>> train = reverse_dataset(train=True)
>>> train[0:1]
[{'source': '6 2 5 8 7', 'target': '7 8 5 2 6'}]
"""
ret = []
for is_requested, n_rows in [(train, train_rows), (dev, dev_rows), (test, test_rows)]:
rows = []
for i in range(n_rows):
length = random.randint(1, seq_max_length)
seq = []
for _ in range(length):
seq.append(str(random.randint(0, 9)))
input_ = ' '.join(seq)
output = ' '.join(reversed(seq))
rows.append({'source': input_, 'target': output})
# NOTE: Given that `random.randint` is deterministic with the same `random_seed` we need
# to allow the random generator to create the train, dev and test dataset in order.
# Otherwise, `reverse(train=True)` and `reverse(test=True)` would share the first 1000 rows.
if not is_requested:
continue
ret.append(rows)
if len(ret) == 1:
return ret[0]
else:
return tuple(ret)