请问Addition overflow detected in是什么意思

打开debug=True之后,在运行’PrefixSumExecutor.run(var)'出现Addition overflow detected in是什么意思

import taichi as ti
import math
import os

ti.init(arch=ti.gpu,debug=True)
vec = ti.math.vec2

SAVE_FRAMES = False

window_size = 1024  # Number of pixels of the window
n = 40000  # Number of grains

density = 100.0
stiffness = 8e3
restitution_coef = 0.001
gravity = -9.81
dt = 0.0001  # Larger dt might lead to unstable results.
substeps = 60


@ti.dataclass
class Grain:
    p: vec  # Position
    m: ti.f32  # Mass
    r: ti.f32  # Radius
    v: vec  # Velocity
    a: vec  # Acceleration
    f: vec  # Force


gf = Grain.field(shape=(n, ))

grain_r_min = 0.001
grain_r_max = 0.002

grid_size = 2*grain_r_max  # Simulation domain of size [0, 1]
grid_n = ti.ceil(1.0/grain_r_max)



@ti.kernel
def init():
    for i in gf:
        # Spread grains in a restricted area.
        l = i * grid_size
        padding = 0.1
        region_width = 1.0 - padding * 2
        pos = vec(l % region_width + padding + grid_size * ti.random() * 0.2,
                  l // region_width * grid_size + 0.3)
        gf[i].p = pos
        gf[i].r = ti.random() * (grain_r_max - grain_r_min) + grain_r_min
        gf[i].m = density * math.pi * gf[i].r**2


@ti.kernel
def update():
    for i in gf:
        a = gf[i].f / gf[i].m
        gf[i].v += (gf[i].a + a) * dt / 2.0
        gf[i].p += gf[i].v * dt + 0.5 * a * dt**2
        gf[i].a = a


@ti.kernel
def apply_bc():
    bounce_coef = 0.3  # Velocity damping
    for i in gf:
        x = gf[i].p[0]
        y = gf[i].p[1]

        if y - gf[i].r < 0:
            gf[i].p[1] = gf[i].r
            gf[i].v[1] *= -bounce_coef

        elif y + gf[i].r > 1.0:
            gf[i].p[1] = 1.0 - gf[i].r
            gf[i].v[1] *= -bounce_coef

        if x - gf[i].r < 0:
            gf[i].p[0] = gf[i].r
            gf[i].v[0] *= -bounce_coef

        elif x + gf[i].r > 1.0:
            gf[i].p[0] = 1.0 - gf[i].r
            gf[i].v[0] *= -bounce_coef


@ti.func
def resolve(i, j):
    rel_pos = gf[j].p - gf[i].p
    dist = ti.sqrt(rel_pos[0]**2 + rel_pos[1]**2)
    delta = -dist + gf[i].r + gf[j].r  # delta = d - 2 * r
    if delta > 0:  # in contact
        normal = rel_pos / dist
        f1 = normal * delta * stiffness
        # Damping force
        M = (gf[i].m * gf[j].m) / (gf[i].m + gf[j].m)
        K = stiffness
        C = 2. * (1. / ti.sqrt(1. + (math.pi / ti.log(restitution_coef))**2)
                  ) * ti.sqrt(K * M)
        V = (gf[j].v - gf[i].v) * normal
        f2 = C * V * normal
        gf[i].f += f2 - f1
        gf[j].f -= f2 - f1


list_cur = ti.field(dtype=ti.i32,shape=grid_n * grid_n+1)
grain_count = ti.field(dtype=ti.i32,shape=grid_n * grid_n+1)
particle_id = ti.field(dtype=ti.i32, shape=n, name="particle_id")


@ti.kernel
def calculate_position():
    '''
    Handle the collision between grains.
    '''
    for i in gf:
        gf[i].f = vec(0., gravity * gf[i].m)  # Apply gravity.

    grain_count.fill(0)

    for i in range(n):
        grid_idx = ti.floor(gf[i].p / grid_size, int)
        linear_idx = grid_idx[0] * grid_n + grid_idx[1]
        grain_count[linear_idx+1] += 1

@ti.kernel
def search():            
    list_cur.fill(0)
    for i in range(n):
        grid_idx = ti.floor(gf[i].p / grid_size, int)
        linear_idx = grid_idx[0] * grid_n + grid_idx[1]
        
        grain_location = grain_count[linear_idx] +ti.atomic_add(list_cur[linear_idx], 1)
        particle_id[grain_location] = i

    # Fast collision detection
    for i in range(n):
        grid_idx = ti.floor(gf[i].p / grid_size, int)
        x_begin = ti.max(grid_idx[0] - 1, 0)
        x_end = ti.min(grid_idx[0] + 2, grid_n)

        y_begin = ti.max(grid_idx[1] - 1, 0)
        y_end = ti.min(grid_idx[1] + 2, grid_n)

        for neigh_i in range(x_begin, x_end):
            for neigh_j in range(y_begin, y_end):
                neigh_linear_idx = neigh_i * grid_n + neigh_j
                for p_idx in range(grain_count[neigh_linear_idx],
                                   grain_count[neigh_linear_idx]+list_cur[neigh_linear_idx]):
                    j = particle_id[p_idx]
                    if i < j:
                        resolve(i, j)

init()
gui = ti.GUI('Taichi DEM', (window_size, window_size))
step = 0

if SAVE_FRAMES:
    os.makedirs('output', exist_ok=True)
pse=ti.algorithms.PrefixSumExecutor(int(grid_n*grid_n+1))

import time 

start=time.time()
while step<5e4:
    for s in range(substeps):
        update()
        apply_bc()
        calculate_position()
        pse.run(grain_count)
        search()
    step+=1
    '''pos = gf.p.to_numpy()
    r = gf.r.to_numpy() * window_size
    gui.circles(pos, radius=r)
    if SAVE_FRAMES:
        gui.show(f'output/{step:06d}.png')
    else:
        gui.show()'''
end=time.time()
print(end-start)

已经解决. 如果要反复使用PrefixSumExecutor.run, 需要改写taichi/_kernel.py->blit_from_field_to_field:

@kernel
def blit_from_field_to_field(
        dst: template(), src: template(), offset: i32, size: i32):
    for i in range(size):
        dst[i + offset] = src[i]
    for i in range(size, dst.shape[0]):
        dst[i] = 0
2 个赞