请问为啥这个代码在GPU运行于CPU运行时结果不同😭

from __utility import *
import taichi as ti
ti.init(arch=ti.gpu)
#N=8
x = np.array([-1, 1])  # 定义 (-1, 1) 向量
#x = np.arange(-1, 2, 2)
Result = np.array(np.meshgrid(*([x]*N))).T.reshape(-1, N)
result = ti.field(ti.f32, shape=(2**N, N))
result.from_numpy(Result)
#print(result)
theta=np.linspace(0,np.pi/2,90)#  %定义角的范围(0-pi)
phi=np.linspace(0,2*np.pi,360)#   %定义j角的范围(0-2pi)
tt, pp=np.meshgrid(theta,phi)# %把空间分为空间角的单元
Stheta = ti.field(ti.f32, shape=(360, 90))
Sphi = ti.field(ti.f32, shape=(360, 90))
Location_x = ti.field(ti.f32, shape=(N, N))
Location_y = ti.field(ti.f32, shape=(N, N))
sA = ti.field(ti.f32, shape=(N))
F = ti.field(ti.f32, shape=(360, 90))
ff = ti.Vector.field(2, ti.f32, shape=(360, 90))
print(sA)
print(type(sA))
Stheta.from_numpy(tt)
Sphi.from_numpy(pp)
x0, y0 = np.meshgrid(np.arange(-(N/2-0.5),(N/2-0.5+1),1), np.arange((N/2-0.5),-(N/2-0.5+1),-1))
Location_x.from_numpy(x0)
Location_y.from_numpy(y0)

@ti.kernel
def far_field(q:int, Ph0:ti.f64) -> ti.f64:
    k, D = 2*ti.math.pi, 2.5
    A = ti.Vector([0.0]*N)
    for l1, l2 in ff:
        ff[l1, l2][0] = 0
        ff[l1, l2][1] = 0
    F.fill(0)
    for i in range(N):
        A[i] = result[q, i]
    Phase = ((A.outer_product(A)+1)/2 * Ph0)
    for mm, nn, j1, j2 in ti.ndrange(N, N, 360, 90):
        ff[j1, j2] += ti.math.cexp(ti.math.vec2(0, -(Phase[mm, nn]+k*D*ti.math.sin(Stheta[j1, j2])*((Location_x[mm, nn])\
            *ti.math.cos(Sphi[j1, j2]) + (Location_y[mm, nn]) * ti.math.sin(Sphi[j1, j2])))))
    kk = 0.0
    ti.loop_config(serialize=True)
    for j1, j2 in F:
        F[j1, j2] = (ti.abs(ti.math.cos(0.067*180/ti.math.pi*Stheta[j1, j2]))*\
            ti.math.exp(-(180/ti.math.pi*Stheta[j1, j2]/31.14) ** 2) * ff[j1, j2]).norm()
        kk = ti.max(kk, F[j1, j2])
    return kk

q=0
Ph0=180/180*np.pi
tt=np.linspace(0,np.pi/2,90)#  %定义角的范围(0-pi)
pp=np.linspace(0,2*np.pi,360)#   %定义j角的范围(0-2pi)
tt, pp=np.meshgrid(tt,pp)
fm = np.zeros(2 ** N)
for i in range(2 ** N):
    fm[i] = far_field(i, Ph0)
    #print(F)
    #print(far_field(i, Ph0))  
xx = np.argmin(fm)
ss = far_field(int(xx), Ph0)
fms = fm[xx]
print(fms)
r = F.to_numpy()
r[r<0] = 0
np.max(r)
x, y, z = sph2cart(r, tt, pp)

fig = pl.figure()
ax = fig.add_subplot(projection="3d")
ax.plot_surface(x, y, z, cmap = "inferno")
pl.show()

并且在gpu上重复运行时的结果也不一致。

解决啦,是因为struct for不能串行。。。改成两层for就ok了

1 个赞