Sparse Matrix Builder赋值失败

在调用 ti.kernelsparse matrix builder 填充的时候,程序报错

***********************************
* Taichi Compiler Stack Traceback *
***********************************
/home/yiliao/anaconda3/envs/py3.8/lib/python3.8/site-packages/taichi/_lib/core/taichi_python.cpython-38-x86_64-linux-gnu.so(+0x46f6f7d) [0x7fcb94006f7d]
/home/yiliao/anaconda3/envs/py3.8/lib/python3.8/site-packages/taichi/_lib/core/taichi_python.cpython-38-x86_64-linux-gnu.so(+0x1b1c5ff) [0x7fcb9142c5ff]
/lib/x86_64-linux-gnu/libc.so.6(+0x43090) [0x7fcbd2621090]
[0x7fcaec8f6e5d]

Internal error occurred. Check out this page for possible solutions:
https:docs.taichi-lang.org/docs/install
[E 07/27/23 17:09:47.936 1548753] Received signal 11 (Segmentation fault)

Process finished with exit code 255

源代码如下:

    @ti.kernel
    def precomputation(self, lhs_t:ti.types.sparse_matrix_builder()):
        ELE_NUM = self.ELE_NUM
        dim = self.dim

        for i in range(self. NODE_NUM):
            for d in ti.static(range(2)):
                lhs_t[i*dim + d, i*dim + d] += self.node_mass[i]/self.dt**2

        """ 这一段与错误无关
        for i in range(self. ELE_NUM):
            B_i = self. B[i]
            a = B_i[0, 0]
            b = B_i[0, 1]
            c = B_i[1, 0]
            d = B_i[1, 1]

            for t in range(2):
                self.A[t*ELE_NUM + i][0, 0] = -a - c
                self.A[t*ELE_NUM + i][0, 2] = a
                self.A[t*ELE_NUM + i][0, 4] = c
                self.A[t*ELE_NUM + i][1, 0] = -b - d
                self.A[t*ELE_NUM + i][1, 2] = b
                self.A[t*ELE_NUM + i][1, 4] = d
                self.A[t*ELE_NUM + i][2, 1] = -a - c
                self.A[t*ELE_NUM + i][2, 3] = a
                self.A[t*ELE_NUM + i][2, 5] = c
                self.A[t*ELE_NUM + i][3, 1] = -b - d
                self.A[t*ELE_NUM + i][3, 3] = b
                self.A[t*ELE_NUM + i][3, 5] = d
        """

        for ele_idx in range(self. ELE_NUM):
            ia, ib, ic = self.element[ele_idx]
            ia_x, ia_y = it's * dim, it's * dim + 1
            ib_x, ib_y = ib * dim, ib * dim + 1
            ic_x, ic_y = ic * dim, ic * dim + 1
            q_idx_vec = Tue. Vector([ia_x, ia_y, ib_x, ib_y, ic_x, ic_y])
            for t in range(2):
                A_i = self. A[t*ELE_NUM + ele_idx]
                for A_row_idx, A_col_idx in ti.static(ti.ndrange(6,6)):
                    lhs_row_idx = q_idx_vec[A_row_idx]
                    lhs_col_idx = q_idx_vec[A_col_idx]
                    matrix_temp = ti.f64(0.)
                    for idx in range(dim**2):
                        weight = ti.f64(0.)
                        if t == 0:
                            weight = self.strain_weight[ele_idx]
                        else:
                            weight = self.vol_weight[ele_idx]
                        lhs_t[lhs_row_idx, lhs_col_idx] += weight * A_i[idx, A_row_idx] * A_i[idx, A_col_idx]

在偶然更改之后,发现将最后的循环中将 sparse matrix 放在循环外面可以解决问题,更改代码如下:

                    ...
                    for idx in range(dim**2):
                        weight = ti.f64(0.)
                        if t == 0:
                            weight = self.strain_weight[ele_idx]
                        else:
                            weight = self.vol_weight[ele_idx]
                        matrix_temp += weight * A_i[idx, A_row_idx] * A_i[idx, A_col_idx]
                    lhs_t[lhs_row_idx, lhs_col_idx] += matrix_temp

这样的更改结果是:不会出现在循环中对 sparse matrix 的某个元素重复操作。
我想知道为什么会导致这样的结果,这是否是一个 bug ?

hello @houkensjtu ,可以麻烦你帮忙看一下吗?感谢!

应该说对于 SparseMatrix 的某个元素进行 += 重复赋值操作是没有问题的。我觉得结合你的错误信息是 [E 07/27/23 17:09:47.936 1548753] Received signal 11 (Segmentation fault),比较可能的原因在于你在循环中使用了 ti.static 进行展开,所以导致展开的代码太长了。由于没有上下文所以我这里也不太好复现,你是不是可以试试下面两件事:

  1. 尝试去掉 ti.static
  2. 把计算中和计算规模相关的常数减小一点

再看看是否还是有同样的报错?

谢谢您的回答,但是我刚刚试了一下,还是有这个问题,我会把完整代码贴出来,但是需要时间删掉一些不必要的部分,感谢!

1 个赞

我们的同事提醒了我,有另一个可能的问题是 max_num_triplets 爆了,在填充矩阵的时候一定要注意非零元素个数不能超过这个参数值,如果不确定就干脆填写 N*N 来测试一下

您好,我把完整的代码贴上来了,我发现改为 arch=ti.gpu 不会报错,但是 cpu 会报错。

import taichi as ti
ti.init(arch=ti.cpu, default_fp=ti.f32, debug=True)
import taichi.math as tm
import numpy as np


@ti.data_oriented
class PDTest():
    def __init__(self):
        self.dim = 2
        self.dt = 1./60
        self.rho = 1.0
        self.E = 5.e5
        self.nu = 0.4
        self.mu, self.lam = self.E / (2 * (1 + self.nu)), self.E * self.nu / ((1 + self.nu) * (1 - 2 * self.nu))
        self.positional_mass = 1.e6
        self.grasp_mass = 1.e6
        self.solve_itr = 10
        self.GRASP_VEL = ti.Vector.field(2, dtype=ti.f64, shape=1)
        self.GRASP_VEL[0] = ti.Vector([0.0, 0.0])

        self.node_pos_init = ti.Vector.field(2, dtype=ti.f64, shape=4)
        self.node_pos = ti.Vector.field(2, dtype=ti.f64, shape=4, needs_grad=True)
        self.node_pos_new = ti.Vector.field(2, dtype=ti.f64, shape=4)
        self.node_mass = ti.field(dtype=ti.f64, shape=4)
        self.node_vel = ti.Vector.field(2, dtype=ti.f64, shape=4)
        self.NODE_NUM = 4

        self.edge = ti.Vector.field(2, dtype=ti.i32, shape=5)
        self.EDGE_NUM = 5

        self.element = ti.Vector.field(3, dtype=ti.i32, shape=2)
        self.ele_vol = ti.field(dtype=ti.f64, shape=2)
        self.strain_weight = ti.field(dtype=ti.f64, shape=2)
        self.vol_weight = ti.field(dtype=ti.f64, shape=2)
        self.ELE_NUM = 2

        self.B = ti.Matrix.field(2, 2, dtype=ti.f64, shape=self.ELE_NUM)
        self.F = ti.Matrix.field(2, 2, dtype=ti.f64, shape=self.ELE_NUM)
        self.A = ti.Matrix.field(4, 6, dtype=ti.f64, shape=2*self.ELE_NUM)
        self.Bp = ti.Matrix.field(2, 2, dtype=ti.f64, shape=2*self.ELE_NUM)

        self.sn = ti.field(dtype=ti.f64, shape=self.NODE_NUM*2)
        self.lhs = ti.field(ti.f64, shape=(2*self.NODE_NUM, 2*self.NODE_NUM))
        self.rhs = ti.field(ti.f64, shape=2*self.NODE_NUM)

        self.lhs_t_builder = ti.linalg.SparseMatrixBuilder(2*self.NODE_NUM, 2*self.NODE_NUM, max_num_triplets=100)
        self.rhs_t = ti.ndarray(dtype=ti.f32, shape=2*self.NODE_NUM)
        self.sn_t = ti.ndarray(dtype=ti.f32, shape=2*self.NODE_NUM)

        self.assign_variable()
        self.construct_B()
        self.construct_volume()


    def assign_variable(self):
        node_pos_np = np.array([[0.0, 0.0], [0.0, 1.0], [1.0, 1.0], [1.0, 0.0]])
        node_mass_np = np.array([1.0, 1.0, 1.0, 1.0])
        self.node_pos_init.from_numpy(node_pos_np)
        self.node_pos.from_numpy(node_pos_np)
        self.node_mass.from_numpy(node_mass_np)

        edge_np = np.array([[0, 1], [1, 2], [2, 3], [3, 0], [0, 2]])
        self.edge.from_numpy(edge_np)
        self.edge_np = edge_np

        element_np = np.array([[0, 1, 2], [0, 2, 3]])
        self.element.from_numpy(element_np)

        self.fix_node_list = [0, 1]
        self.grasp_node_list = [2]


    @ti.kernel
    def construct_B(self):
        for i in range(self.ELE_NUM):
            ia, ib, ic = self.element[i]
            pa, pb, pc = self.node_pos_init[ia], self.node_pos_init[ib], self.node_pos_init[ic]
            B_i_inv = ti.Matrix.cols([pb - pa, pc - pa])
            self.B[i] = B_i_inv.inverse()


    @ti.kernel
    def construct_volume(self):
        for i in range(self.ELE_NUM):
            ia, ib, ic = self.element[i]
            pa, pb, pc = self.node_pos_init[ia], self.node_pos_init[ib], self.node_pos_init[ic]
            ele_volume = abs((pb - pa).cross(pc - pa)) / 2.0
            self.ele_vol[i] = ele_volume
            self.strain_weight[i] = self.mu * 2 * ele_volume
            self.vol_weight[i] = self.lam * self.dim * ele_volume


    @ti.kernel
    def precomputation(self, lhs_t:ti.types.sparse_matrix_builder()):
        ELE_NUM = self.ELE_NUM
        dim = self.dim

        for i in range(self.NODE_NUM):
            for d in ti.static(range(2)):
                lhs_t[i*dim + d, i*dim + d] += self.node_mass[i]/self.dt**2

        for i in range(self.ELE_NUM):
            B_i = self.B[i]
            a = B_i[0, 0]
            b = B_i[0, 1]
            c = B_i[1, 0]
            d = B_i[1, 1]

            for t in range(2):
                self.A[t*ELE_NUM + i][0, 0] = -a - c
                self.A[t*ELE_NUM + i][0, 2] = a
                self.A[t*ELE_NUM + i][0, 4] = c
                self.A[t*ELE_NUM + i][1, 0] = -b - d
                self.A[t*ELE_NUM + i][1, 2] = b
                self.A[t*ELE_NUM + i][1, 4] = d
                self.A[t*ELE_NUM + i][2, 1] = -a - c
                self.A[t*ELE_NUM + i][2, 3] = a
                self.A[t*ELE_NUM + i][2, 5] = c
                self.A[t*ELE_NUM + i][3, 1] = -b - d
                self.A[t*ELE_NUM + i][3, 3] = b
                self.A[t*ELE_NUM + i][3, 5] = d

        for ele_idx in range(self.ELE_NUM):
            ia, ib, ic = self.element[ele_idx]
            ia_x, ia_y = ia * dim, ia * dim + 1
            ib_x, ib_y = ib * dim, ib * dim + 1
            ic_x, ic_y = ic * dim, ic * dim + 1
            q_idx_vec = ti.Vector([ia_x, ia_y, ib_x, ib_y, ic_x, ic_y])
            for t in range(2):
                A_i = self.A[t*ELE_NUM + ele_idx]
                # for A_row_idx, A_col_idx in ti.static(ti.ndrange(6,6)):
                for A_row_idx, A_col_idx in ti.ndrange(6, 6):
                    lhs_row_idx = q_idx_vec[A_row_idx]
                    lhs_col_idx = q_idx_vec[A_col_idx]
                    matrix_temp = ti.f64(0.)
                    for idx in range(dim**2):
                        weight = ti.f64(0.)
                        if t == 0:
                            weight = self.strain_weight[ele_idx]
                        else:
                            weight = self.vol_weight[ele_idx]
                        matrix_temp += weight * A_i[idx, A_row_idx] * A_i[idx, A_col_idx]
                        lhs_t[lhs_row_idx, lhs_col_idx] += \
                            weight * A_i[idx, A_row_idx] * A_i[idx, A_col_idx]
                    # print('Matrix Index:', lhs_row_idx, lhs_col_idx)
                    # lhs_t[lhs_row_idx, lhs_col_idx] += matrix_temp


def main():
    class AutoDiffPD(PDTest):
        def __init__(self):
            super().__init__()
            self.my_loss = ti.field(dtype=ti.f64, shape=(), needs_grad=True)

    test = AutoDiffPD()

    test.precomputation(test.lhs_t_builder)

    test.lhs_t_builder.print_triplets()
    test.lhs_t = test.lhs_t_builder.build()
    test.solver = ti.linalg.SparseSolver(solver_type="LLT")
    test.solver.analyze_pattern(test.lhs_t)
    test.solver.factorize(test.lhs_t)


if __name__ == '__main__':
    main()

关于 max_num_triplets 我还有疑问,使用 print_triplets() 输出的 num_triplets 通常大于 n*n,请问是否 max_num_triplets 也需要根据这个进行调整。