deepin-ocr/3rdparty/ncnn/tools/pnnx/tests/test_nn_MultiheadAttention.py

# Tencent is pleased to support the open source community by making ncnn available.
#
# Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
#
# Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# https://opensource.org/licenses/BSD-3-Clause
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.

import torch
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()

        self.attention_0_0 = nn.MultiheadAttention(embed_dim=64, num_heads=4)
        self.attention_0_1 = nn.MultiheadAttention(embed_dim=64, num_heads=8, bias=False, add_bias_kv=False, add_zero_attn=False)
        self.attention_0_2 = nn.MultiheadAttention(embed_dim=64, num_heads=16, bias=True, add_bias_kv=True, add_zero_attn=True)

        if torch.__version__ >= '1.9':
            self.attention_1_0 = nn.MultiheadAttention(embed_dim=40, num_heads=4, batch_first=True)
            self.attention_1_1 = nn.MultiheadAttention(embed_dim=40, num_heads=8, bias=False, add_bias_kv=False, add_zero_attn=False, batch_first=True)
            self.attention_1_2 = nn.MultiheadAttention(embed_dim=40, num_heads=10, bias=True, add_bias_kv=True, add_zero_attn=True, batch_first=True)

    def forward(self, xq, xk, xv, yq, yk, yv):
        x0, x0w = self.attention_0_0(xq, xk, xv)
        x1, x1w = self.attention_0_1(xq, xk, xv)
        x2, x2w = self.attention_0_2(xq, xk, xv)

        if torch.__version__ < '1.9':
            return x0, x0w, x1, x1w, x2, x2w

        y0, y0w = self.attention_1_0(yq, yk, yv)
        y1, y1w = self.attention_1_1(yq, yk, yv)
        y2, y2w = self.attention_1_2(yq, yk, yv)

        return x0, x0w, x1, x1w, x2, x2w, y0, y0w, y1, y1w, y2, y2w

def test():
    net = Model()
    net.eval()

    torch.manual_seed(0)
    xq = torch.rand(20, 1, 64)
    xk = torch.rand(20, 1, 64)
    xv = torch.rand(20, 1, 64)
    yq = torch.rand(1, 15, 40)
    yk = torch.rand(1, 24, 40)
    yv = torch.rand(1, 24, 40)

    a = net(xq, xk, xv, yq, yk, yv)

    # export torchscript
    mod = torch.jit.trace(net, (xq, xk, xv, yq, yk, yv))
    mod.save("test_nn_MultiheadAttention.pt")

    # torchscript to pnnx
    import os
    os.system("../src/pnnx test_nn_MultiheadAttention.pt inputshape=[20,1,64],[20,1,64],[20,1,64],[1,15,40],[1,24,40],[1,24,40]")

    # pnnx inference
    import test_nn_MultiheadAttention_pnnx
    b = test_nn_MultiheadAttention_pnnx.test_inference()

    for a0, b0 in zip(a, b):
        if not torch.equal(a0, b0):
            return False
    return True

if __name__ == "__main__":
    if test():
        exit(0)
    else:
        exit(1)