## 0x0. 前言

，原贴见：https://www.zhihu.com/question/502301777/answer/2248950419 。回答提到了去年在OneFlow开发一些算子时，基于算子AutoTest框架找到了一些PyTorch算子的bug，并给PyTorch做出了反馈或修复。但这个回答没有介绍这个AutoTest框架长什幺样子，以及它背后的原理。因此，这篇文章就用来介绍OneFlow的算子AutoTest框架看一下OneFlow深度学习框架在算子开发过程中是如何优雅的做算子对齐任务的（由@大缺弦 开发，后经我和其它同事进行扩展和丰富功能形成今天的形态）。这个AutoTest框架也可以很轻易移植到其它深度学习训练框架使用，代码实现在`https://github.com/Oneflow-Inc/oneflow/blob/v0.6.0/python/oneflow/test_utils/automated_test_util/torch_flow_dual_object.py`

## 0x1. 传统的算子对齐方式

``````import torch
import numpy as np
import oneflow as flow
for N in range(1, 5):
for C_in in range(1, 10):
for L_in in range(1, 10):
for H_in in range(1, 10):
for C_out in range(1, 10):
for Ksize in range(1, 10):
for Dilation in range(1, 10):
for Stride in range(1, min(L_in, H_in)):
for OutPad in range(1, min(Dilation, Stride)):
try:
torch_input = torch.randn(N, C_in, L_in, H_in)
flow_input = flow.tensor(torch_input.numpy())
torch_out = torch_m(torch_input)
flow_out = flow_m(flow_input)
torch_out = torch_out.sum()
flow_out = flow_out.sum()
assert(np.allclose(torch_out.detach().numpy(), flow_out.detach().numpy(), 1e-06, 1e-06)), "forward not equal"
torch_out.backward()
flow_out.backward()
except Exception as e:
print('Input Param Error')
``````

`https://github.com/Oneflow-Inc/oneflow/blob/v0.6.0/python/oneflow/test_utils/automated_test_util/generators.py`
。并且这个AutoTest框架可以轻易移植到其它任何深度学习框架去做算子对齐任务。

## 0x2. 算子AutoTest框架用法

``````@autotest()
def test_deconv2d_with_random_data(test_case):
channels = random(1, 6)
m = torch.nn.ConvTranspose2d(
in_channels=channels,
out_channels=random(1, 20),
kernel_size=random(1, 4),
stride=random() | nothing(),
dilation=random(1, 5) | nothing(),
groups=random(1, 5) | nothing(),
)
m.train(random())
device = random_device()
m.to(device)
x = random_pytorch_tensor(ndim=4, dim1=channels).to(device)
y = m(x)
return y
``````

`````` @autotest()
def test_flow_matmul_with_random_data(test_case):
k = random(1, 6)
x = random_pytorch_tensor(ndim=2, dim1=k)
y = random_pytorch_tensor(ndim=2, dim0=k)
z = torch.matmul(x, y)
return z
``````

`y`
，它们的维度分别是`[m, k]`
`[k, n]`
，这些维度的值都是随机生成的。

## 0x3.1 如何产生随机数据？

``````__all__ = [
"random_tensor",
"random_bool",
"random_device",
"random",
"random_or_nothing",
"oneof",
"constant",
"nothing"
]
``````

，也可以是自定义数据类型比如`tensor`
。AutoTest框架所有的参数的随机性都是基于这些方法来做到的，我们看一下`generator`

``````class generator:
def __init__(self, children):
self.children = children
self._value = None
def _init(self):
self._value = None
for x in self.children:
x._init()
def eval(self):
self._init()
return self.value()
def _calc_value(self):
raise NotImplementedError()
def value(self):
if self._value is None:
self._value = self._calc_value()
return self._value
def size(self):
return 1
def __or__(self, other):
other = pack(other)
return oneof(
self, other, possibility=self.size() / (self.size() + other.size())
)
def __ror__(self, other):
return self | other
return self + other
def __sub__(self, other):
return self + neg(other)
def __rsub__(self, other):
return neg(self - other)
def __mul__(self, other):
return mul(self, other)
def __rmul__(self, other):
return self * other
def to(self, annotation):
self._to(annotation)
for x in self.children:
x.to(annotation)
return self
def _to(self, annotation):
pass
``````

`value`
`eval`

`__calc_value`
`size`
`_to`

``````class Nothing:
pass
class nothing(generator):
def __init__(self):
super().__init__([])
def _calc_value(self):
return Nothing()
``````

``````class random(generator):
def __init__(self, low=1, high=6):
self.low = pack(low)
self.high = pack(high)
super().__init__([self.low, self.high])
self.annotation = None
def _to(self, annotation):
if self.annotation is not None:
return
if hasattr(annotation, "__origin__"):
# PyTorch _size_2_t and similar types are defined by type variables,
# leading to unexpected __args__ and __origin__
#
# >>> _size_2_t = Union[T, Tuple[T, T]][int]
# >>> _size_2_t.__origin__
# typing.Union[~T, typing.Tuple[~T, ~T]]
#
# So recreate a new annotation object by repr and eval
#
# >>> _size_2_t
# typing.Union[int, typing.Tuple[int, int]]
# >>> _size_2_t_new = eval(repr(annotation))
# >>> _size_2_t_new.__origin__
# typing.Union
annotation = eval(repr(annotation))
self.annotation = annotation
def _generate(self, annotation):
if hasattr(annotation, "__origin__"):
if annotation.__origin__ is Union:
x = random_util.choice(annotation.__args__)
return self._generate(x)
if annotation.__origin__ is Tuple or annotation.__origin__ is py_tuple:
return [self._generate(x) for x in annotation.__args__]
else:
raise NotImplementedError(
f"Not implemented annotation {annotation} in random, type(annotation.__origin__) is {type(annotation.__origin__)}"
)
low, high = self.low.value(), self.high.value()
if annotation == int:
val = int(rng.integers(low, high))
elif annotation == float:
val = float(rng.random() * (high - low) + low)
elif annotation == bool:
val = random_util.choice([True, False])
else:
raise NotImplementedError(
f"Not implemented annotation {annotation} in random"
)
return val
def _calc_value(self):
return self._generate(self.annotation)
def random_or_nothing(low, high):
return oneof(random(low, high), nothing(), possibility=2 / 3)
``````

## 0x3.2 AutoTest核心实现

AutoTest框架的核心实现在`https://github.com/Oneflow-Inc/oneflow/blob/v0.6.0/python/oneflow/test_utils/automated_test_util/torch_flow_dual_object.py`

``````torch = GetDualObject("", torch_original, flow)
__all__ = ["autotest", "random_pytorch_tensor"]
``````

``````class DualObject:
def __init__(self, name, pytorch, oneflow):
self.name = name
self.pytorch = pytorch
self.oneflow = oneflow
if isinstance(pytorch, torch_original.nn.Module):
state_dict = pytorch.state_dict()
state_dict = {k: v.detach().cpu().numpy() for (k, v) in state_dict.items()}
if testing:
dual_modules_to_test.append(self)
if isinstance(pytorch, torch_original.Tensor):
if testing:
dual_objects_to_test.append(self)
def __repr__(self):
return f"PyTorch object:
{self.pytorch}
OneFlow object:
{self.oneflow}"
def __getattr__(self, key):
pytorch_attr = getattr(self.pytorch, key)
oneflow_attr = getattr(self.oneflow, key)
new_name = f"{self.name}.{key}"
global call_pytorch
call_pytorch = self.pytorch
return GetDualObject(new_name, pytorch_attr, oneflow_attr)
``````

`__init__`

`flow`
，而在导出`random_pytorch_tensor`

`oneflow_tensor`
。这里不妨先看一下`random_pytorch_tensor`

``````def random_pytorch_tensor(
ndim=None,
dim0=1,
dim1=None,
dim2=None,
dim3=None,
dim4=None,
low=0,
high=1,
dtype=float,
):
pytorch_tensor = (
random_tensor(ndim, dim0, dim1, dim2, dim3, dim4, low, high, dtype)
.value()
)
flow_tensor = flow.tensor(
pytorch_tensor.detach().cpu().numpy(),
)
return GetDualObject("unused", pytorch_tensor, flow_tensor)
``````

`dual_objects_to_test`

``````def __getattr__(self, key):
pytorch_attr = getattr(self.pytorch, key)
oneflow_attr = getattr(self.oneflow, key)
print(key)
# print(pytorch_attr)
# print(oneflow_attr)
new_name = f"{self.name}.{key}"
return GetDualObject(new_name, pytorch_attr, oneflow_attr)
# flatten的AutoTest程序
@autotest(auto_backward=False)
def test_against_pytorch(test_case):
m = torch.nn.Flatten(
start_dim=random(1, 6) | nothing(), end_dim=random(1, 6) | nothing()
)
m.train(random())
device = random_device()
m.to(device)
x = random_pytorch_tensor().to(device)
y = m(x)
return y
``````

``````nn
Flatten
train
to
to
``````

``````PyTorch object:
<bound method Module.train of Flatten(start_dim=1, end_dim=-1)>
OneFlow object:
<bound method Module.train of Flatten(start_dim=1, end_dim=-1)>
``````

`GetDualObject`

）和其它Module和函数不同（通过`__call__`
）。

`GetDualObject`

``````def autotest(
n=20,
auto_backward=True,
rtol=0.0001,
atol=1e-05,
check_graph=True,
check_allclose=True,
):
verbose = os.getenv("ONEFLOW_TEST_VERBOSE") is not None
def deco(f):
@functools.wraps(f)
def new_f(test_case):
nonlocal n
loop_limit = n * 20
loop = 0
while n > 0:
clear_note_fake_program()
if loop > loop_limit:
raise ValueError("autotest stuck in an endless loop!")
dual_modules_to_test.clear()
dual_objects_to_test.clear()
try:
global testing
testing = True
global testing_graph
if check_graph:
testing_graph = True
res = f(test_case)
testing = False
testing_graph = False
except (PyTorchDoesNotSupportError, BothDoNotSupportError) as e:
if verbose:
print(f"{f.__name__}")
print(e)
loop += 1
continue
if res is not None:
if not isinstance(res, collections.abc.Sequence):
res = [res]
func_outputs = res
for x in res:
if auto_backward:
if isinstance(x.pytorch, torch_original.Tensor):
call_tensor_id.append(id(x.pytorch))
x.sum().backward()
dual_objects_to_test.append(x)
for x in dual_modules_to_test:
for key in x.pytorch.state_dict().keys():
if key not in x.oneflow.state_dict().keys():
warnings.warn(f"oneflow module don't have `{key}`")
continue
vis_parameters[key] = x.pytorch.state_dict()[key]
dual_objects_to_test.append(
GetDualObject(
"unused",
getattr(x.pytorch, key),
getattr(x.oneflow, key),
)
)
call_tensor_id.append(id(getattr(x.pytorch, key)))
dual_objects_to_test.append(
GetDualObject(
"unused",
)
)
for x in dual_objects_to_test:
if (
isinstance(x.pytorch, torch_original.Tensor)
and id(x.pytorch) not in call_tensor_id
):
vis_tensor.append(x.pytorch)
# check eager
for x in dual_objects_to_test:
if check_allclose:
test_case.assertTrue(check_equality(x, rtol=rtol, atol=atol), x)
if verbose:
print(f"{f.__name__} test eager passed.")
n -= 1
loop += 1
return new_f
return deco
``````

## 0x6. 相关链接

https://github.com/Oneflow-Inc/oneflow

https://github.com/pytorch/pytorch