I modified the file diffmpm_simple, putting p2g, grid_op, and g2p together in one kernel substep:
import taichi as ti
import numpy as np
import cv2
import os
import h5py
import matplotlib.pyplot as plt
import torch
real = ti.f32
ti.set_default_fp(real)
ti.get_runtime().set_verbose(True)
bound = 3
dim = 2
n_particles = 256
N = 80
n_grid = 120
dx = 1 / n_grid
inv_dx = 1 / dx
dt = 3e-4
p_mass = 1
p_vol = 1
E = 100
mu = E
la = E
max_steps = 1024
steps = 1024
gravity = 9.8
scalar = lambda: ti.var(dt=real)
vec = lambda: ti.Vector(dim, dt=real)
mat = lambda: ti.Matrix(dim, dim, dt=real)
x = ti.Vector(dim, dt=real, shape=(n_particles), needs_grad=True)
v = ti.Vector(dim, dt=real, shape=(n_particles), needs_grad=True)
grid_v_in = ti.Vector(dim, dt=real, shape=(n_grid, n_grid), needs_grad=True)
grid_v_out = ti.Vector(dim, dt=real, shape=(n_grid, n_grid), needs_grad=True)
grid_m_in = ti.var(dt=real, shape=(n_grid, n_grid), needs_grad=True)
C = ti.Matrix(dim, dim, dt=real, shape=(n_particles), needs_grad=True)
F = ti.Matrix(dim, dim, dt=real, shape=(n_particles), needs_grad=True)
init_v = ti.Vector(dim, dt=real, shape=(), needs_grad=True)
ti.cfg.arch = ti.cuda
@ti.kernel
def random_init():
init_v[None] = [ti.random(ti.float32)*2-1, ti.random(ti.float32)*2-1]
for i in range(n_particles):
v[i] = init_v
@ti.kernel
def substep():
grid_v_in.fill(0)
grid_m_in.fill(0)
for p in range(0, n_particles):
base = ti.cast(x[p] * inv_dx - 0.5, ti.i32)
fx = x[p] * inv_dx - ti.cast(base, ti.i32)
w = [0.5 * ti.sqr(1.5 - fx), 0.75 - ti.sqr(fx - 1),
0.5 * ti.sqr(fx - 0.5)]
new_F = (ti.Matrix.diag(dim=2, val=1) + dt * C[p]) @ F[p]
F[p] = new_F
J = ti.determinant(new_F)
r, s = ti.polar_decompose(new_F)
cauchy = 2 * mu * (new_F - r) @ ti.transposed(new_F) + \
ti.Matrix.diag(2, la * (J - 1) * J)
stress = -(dt * p_vol * 4 * inv_dx * inv_dx) * cauchy
affine = stress + p_mass * C[p]
for i in ti.static(range(3)):
for j in ti.static(range(3)):
offset = ti.Vector([i, j])
dpos = (ti.cast(ti.Vector([i, j]), real) - fx) * dx
weight = w[i](0) * w[j](1)
grid_v_in[base + offset] += weight * (p_mass * v[p] + affine @ dpos)
grid_m_in[base + offset] += weight * p_mass
for p in range(n_grid * n_grid):
i = p // n_grid
j = p - n_grid * i
inv_m = 1 / (grid_m_in[i, j] + 1e-10)
v_out = inv_m * grid_v_in[i, j]
v_out[1] -= dt * gravity
if i < bound and v_out[0] < 0:
v_out[0] = 0
if i > n_grid - bound and v_out[0] > 0:
v_out[0] = 0
if j < bound and v_out[1] < 0:
v_out[1] = 0
if j > n_grid - bound and v_out[1] > 0:
v_out[1] = 0
grid_v_out[i, j] = v_out
for p in range(n_particles):
base = ti.cast(x[p] * inv_dx - 0.5, ti.i32)
fx = x[p] * inv_dx - ti.cast(base, real)
w = [0.5 * ti.sqr(1.5 - fx), 0.75 - ti.sqr(fx - 1.0),
0.5 * ti.sqr(fx - 0.5)]
new_v = ti.Vector([0.0, 0.0])
new_C = ti.Matrix([[0.0, 0.0], [0.0, 0.0]])
for i in ti.static(range(3)):
for j in ti.static(range(3)):
dpos = ti.cast(ti.Vector([i, j]), real) - fx
g_v = grid_v_out[base(0) + i, base(1) + j]
weight = w[i](0) * w[j](1)
new_v += weight * g_v
new_C += 4 * weight * ti.outer_product(g_v, dpos) * inv_dx
v[p] = new_v
x[p] = x[p] + dt * v[p]
C[p] = new_C
for k in range(30):
random_init()
for i in range(N):
for j in range(N):
x[i * N + j] = [dx * (i * 0.5 + 10), dx * (j * 0.5 + 25)]
C.fill(0)
for i in range(n_particles):
F[i] = [[1, 0], [0, 1]]
x_list = [x.to_torch().squeeze(-1)]
for s in range(steps - 1):
substep()
x_list.append(x.to_torch().squeeze(-1).float()) # to tensor is copy so no problem
# visualize
for s in range(63, steps, 64):
scale = 4
img = np.zeros(shape=(scale * n_grid, scale * n_grid)) + 0.3
total = [0, 0]
for i in range(n_particles):
p_x = int(scale * x_list[s][i, 0] / dx)
p_y = int(scale * x_list[s][i, 1] / dx)
img[p_x, p_y] = 1
img = img.swapaxes(0, 1)[::-1]
cv2.imshow('MPM', img)
cv2.waitKey(1)
I got the error saying ti.static can only be used inside Taichi kernels while ti.static is indeed in the kernel:
[Release mode]
[Taichi version 0.3.2, cpu only, commit 26635f82]
Initializing runtime with 48 elements
Runtime initialized.
Traceback (most recent call last):
File "D:/learn_mpm/test.py", line 122, in <module>
substep()
File "C:\Users\49446\AppData\Local\conda\conda\envs\py36\lib\site-packages\taichi\lang\kernel.py", line 308, in __call__
self.materialize(key=key, args=args, arg_features=self.mapper.extract(args))
File "C:\Users\49446\AppData\Local\conda\conda\envs\py36\lib\site-packages\taichi\lang\kernel.py", line 233, in materialize
taichi_kernel = taichi_kernel.define(taichi_ast_generator)
File "C:\Users\49446\AppData\Local\conda\conda\envs\py36\lib\site-packages\taichi\lang\kernel.py", line 230, in taichi_ast_generator
compiled()
File "D:/learn_mpm/test.py", line 68, in substep
for i in ti.static(range(3)):
File "C:\Users\49446\AppData\Local\conda\conda\envs\py36\lib\site-packages\taichi\lang\impl.py", line 252, in static
).inside_kernel, 'ti.static can only be used inside Taichi kernels'
AssertionError: ti.static can only be used inside Taichi kernels
Process finished with exit code 1
Is there any limit while putting such a large amount of codes in one kernel?