# 1.在ti.metal与ti.cpu两种条件下运行相同代码所得的计算结果不同；2.自动并行似乎没有成功

• 问题1： 在ti.metal与ti.cpu两种条件下运行相同代码所得的计算结果不同

``````import time
import taichi as ti

ti.init(arch=ti.metal)

@ti.kernel
def calc_pi() -> ti.f32:
sum = 0.0

for i in range(10**6):
n = 2 * i + 1
sum += pow(-1.0, i) / n
return sum * 4

t0 = time.time()
print('PI =', calc_pi())
t1 = time.time()
print(f'{t1 - t0:.3} sec')
``````

``````[Taichi] Starting on arch=metal
PI = 31.55793571472168
0.487 sec
``````

``````import time
import taichi as ti

ti.init(arch=ti.cpu)

@ti.kernel
def calc_pi() -> ti.f32:
sum = 0.0

for i in range(10**6):
n = 2 * i + 1
sum += pow(-1.0, i) / n
return sum * 4

t0 = time.time()
print('PI =', calc_pi())
t1 = time.time()
print(f'{t1 - t0:.3} sec')
``````

``````[Taichi] Starting on arch=x64
PI = 3.141583204269409
0.0561 sec
``````

• 问题2：自动并行似乎没有成功

``````伪代码：
#全局变量:
dt = 10^-8
Num_t  = 10^4
d_nu  = 0.1
HW  = 6*10^3
pumpcenter  :  ti.f32
num_pumpFW = 6*10^3
N1,  N2,  N3,  N4,  dN1,  dN2,  dN3,  dN4  :  ti.field ( ti.f32,  shape = 120000 )
pumparray  :  ti.field ( ti.f32,  shape = num_pumpFW )
detune_array  :  ti.Vector.field( 4,  ti.f32,  shape = num_pumpFW )
choose_matrix  :  ti.Vector.field( 4,  ti.i32,  shape = 4 )
#主要计算模块使用 Taichi 实现：

@ti.kernel
for  tn  in  Num_t  :	#变量按时间演化，该循环不能使用并行计算，ti.loop_config( serialize=True )

for  i_transi0  in  range(num_pumpFW)  :
transi0 = pumparray[ i_transi0 ]

for  j_choice  in  ti.static( range(4) ):
n1 = N1[ detune_array[ i_transi0 ][ j_choice ] ]           # 类似得到n2, n3, n4
k1, k2, k3, k4  =  k_L4RK( transi0,  pumpcenter,  n1,  n2,  n3,  n4,  j_choice )
dN1[ detune_array[ i_transi0 ][ j_choice ] ]  +=  k1*dt	   # dN2, dN3, dN4类似操作

for  i  in  range(120000)  :	# 一个dt计算结束更新N1, N2, N3, N4
N1[ i ]  +=  dN1[ i ]	    # N2, N3, N4类似操作
dN1[ i ]  =  0	            # dN2, dN3, dN4类似操作
``````

``````@ti.kernel
def Burning():

ti.loop_config(serialize=True)
for tn in ti.ndrange(Num_t):

for i_transi0 in ti.ndrange(num_pumpFW):
transi0 = pumparray_ti[i_transi0]

for j_choice in ti.static(range(4)):
n1 = N1_ti[detune_array_ti[i_transi0][j_choice]]
n2 = N2_ti[detune_array_ti[i_transi0][j_choice]]
n3 = N3_ti[detune_array_ti[i_transi0][j_choice]]
n4 = N4_ti[detune_array_ti[i_transi0][j_choice]]

k_N1, k_N2, k_N3, k_N4 = k_L4RK(transi0, pumpcenter_ti,
n1, n2, n3, n4, j_choice)

dN1_ti[detune_array_ti[i_transi0][j_choice]] += k_N1*dt
dN2_ti[detune_array_ti[i_transi0][j_choice]] += k_N2*dt
dN3_ti[detune_array_ti[i_transi0][j_choice]] += k_N3*dt
dN4_ti[detune_array_ti[i_transi0][j_choice]] += k_N4*dt

for i in ti.ndrange(Inh_len):

N1_ti[i] += dN1_ti[i]
N2_ti[i] += dN2_ti[i]
N3_ti[i] += dN3_ti[i]
N4_ti[i] += dN4_ti[i]

dN1_ti[i] = 0
dN2_ti[i] = 0
dN3_ti[i] = 0
dN4_ti[i] = 0
``````

``````    ti.loop_config(serialize=True)
for tn in ti.ndrange(Num_t):
``````

``````        for i_transi0 in ti.ndrange(num_pumpFW):
``````

• 综上，我的主要疑问是：
1. 尽管（表面上）程序一样，metal和cpu还是在计算上有所差别？（例如精度？即使都是f32的数据类型）因此会出现ti.metal与ti.cpu两种条件下计算结果不一样的情况；
2. 请问我的程序中for循环2未能成功自动并行的原因是什么呢？万分感谢！呜呜呜
• 附录（代码，与Taichi有关代码是@ti.func,@ti.kernel修饰的函数部分，以及所设置的全局变量ti.field类型的数据等）
``````
import taichi as ti
import numpy as np

import matplotlib.pyplot as plt
import scipy.constants as const
from scipy.special import wofz
from sympy.physics.quantum import TensorProduct
from numba import jit
import time
from scipy.linalg import eigh
import pandas as pd

ti.init(arch=ti.cpu)

def Voiigt(x, alpha, gamma):
sigma = alpha / np.sqrt(2 * np.log(2))

return np.real(wofz((x + 1j*gamma)/sigma/np.sqrt(2))) / sigma\
/np.sqrt(2*np.pi)

def J_z( eigenvector ):

J = eigenvector[0]
m = eigenvector[1]
eigenvalue = m
return eigenvalue, (J,m)

def J_plus( eigenvector ):

J = eigenvector[0]
m = eigenvector[1]
pseudo_eigenvalue = np.sqrt( (J-m)*(J+m+1) )
return pseudo_eigenvalue, (J,m+1)

def J_minus( eigenvector ):

J = eigenvector[0]
m = eigenvector[1]
pseudo_eigenvalue = np.sqrt( (J+m)*(J-m+1) )
return pseudo_eigenvalue, (J,m-1)

def kronecker_bra_ket( eigenvector1, eigenvector2 ):

if eigenvector1 == eigenvector2:
return 1
else:
return 0

def spin( azimuthal_quantum_number ):

aqm = azimuthal_quantum_number
# 1. create a basis
basis = []
order = np.linspace( +aqm, -aqm, round(2*aqm+1), endpoint =True )
for i in order:
basis.append( ( aqm, i ) )
# 2. create matrix of operator
J_z_matrix = np.zeros([round(2*aqm+1), round(2*aqm+1)], dtype = float)
J_plus_matrix = np.zeros([round(2*aqm+1), round(2*aqm+1)], dtype = float)
J_minus_matrix = np.zeros([round(2*aqm+1), round(2*aqm+1)], dtype = float)
for row_num in range(round(2*aqm+1)):
for column_num in range(round(2*aqm+1)):
bra = basis[row_num]

ket = J_z( basis[column_num] )[1]
eigenvalue = J_z( basis[column_num] )[0]
J_z_matrix[row_num, column_num] = kronecker_bra_ket( bra, ket)\
*eigenvalue
ket = J_plus( basis[column_num] )[1]
eigenvalue = J_plus( basis[column_num] )[0]
J_plus_matrix[row_num, column_num] = kronecker_bra_ket( bra, ket)\
*eigenvalue
ket = J_minus( basis[column_num] )[1]
eigenvalue = J_minus( basis[column_num] )[0]
J_minus_matrix[row_num, column_num] = kronecker_bra_ket( bra, ket)\
*eigenvalue
J_x_matrix = 0.5*( J_plus_matrix + J_minus_matrix )
J_y_matrix = -0.5*1j*( J_plus_matrix - J_minus_matrix )
J = np.array([[J_x_matrix, J_y_matrix, J_z_matrix]])
return J

def matrix_multiply( AB, BC ):
'''
numpy官网的各种矩阵操作函数太麻烦了，自己造一个
矩阵AB[A:row,B:colume]（type=ndarry）*矩阵BC[B:row,C:colume]
（type=ndarry）的矩阵元素乘法器以便于嵌套矩阵作
为元素的矩阵乘法.Return AC

*e.g.  g_site1_sub1_ground*Pauli_matrix
'''
A = AB.shape[0]
B = AB.shape[1]  #B = BC.shape[0]
C = BC.shape[1]

AC_lst = []
if ( AB[0,0].shape == () ) and ( BC[0,0].shape == () ):
'''
element of AB is number, element of BC is also number
'''
for i in range(A):
for j in range(C):
AC_ij = 0
for k in range(B):
AC_ij = AC_ij + AB[i,k]*BC[k,j]
AC_lst.append(AC_ij)
return np.array(AC_lst).reshape( A, C )
else:
'''
element of AB or BC is number, the other is matrix
or
element of AB is matrix, element of BC is also matrix
'''
for i in range(A):
for j in range(C):
AC_ij = 0
for k in range(B):
AC_ij = AC_ij + np.dot( AB[i,k],BC[k,j] )
AC_lst.append(AC_ij)
ele_shape = AC_ij.shape
return np.array(AC_lst).reshape( A, C, ele_shape[0], ele_shape[1] )

def Trans( nm ):
'''
适用于以矩阵为元素的矩阵的转置:

[ ]n*m--->[ ]m*n, A_ij--->A_ji for square matrix

*elements of the matrix is also matrix
'''
n = nm.shape[0]
m = nm.shape[1]
mn_lst = []
for col_n in range(m):
for row_n in range(n):
mn_lst.append( nm[row_n][col_n] )
ele_shape = nm[row_n][col_n].shape
return np.array(mn_lst).reshape( m, n, ele_shape[0], ele_shape[1] )

def B( phi, theta, am_B ):

B_x = am_B*np.sin(theta)*np.cos(phi)
B_y = am_B*np.sin(theta)*np.sin(phi)
B_z = am_B*np.cos(theta)
B = np.array( [[B_x, B_y, B_z]] )
return B

def CenterFreq_non167(phi, theta, am_B, flag1, flag2):

B_vec = B( phi, theta, am_B)
S = spin(1/2)

# some constants
mu_B = const.e*const.hbar/(2*const.m_e)

if flag2 == 'Site 1':
if flag1 == 'II':

g_tensor_e = np.array([[1.63, -1.86, -2.98],
[-1.86, 3.66, 5.11],
[-2.98, 5.11, 8.29]])

g_tensor_g = np.array([[2.90, -2.95, -3.56],
[-2.95, 8.90, 5.57],
[-3.56, 5.57, 5.12]])
elif flag1 == 'I':

g_tensor_e = np.array([[1.63, -1.86, 2.98],
[-1.86, 3.66, -5.11],
[2.98, -5.11, 8.29]])

g_tensor_g = np.array([[2.90, -2.95, 3.56],
[-2.95, 8.90, -5.57],
[3.56, -5.57, 5.12]])
freq_original = const.c/(1536.4*10**-9*10**6)
elif flag2 == 'Site 2':
if flag1 == 'I':

g_tensor_e = np.array([[11.58, -0.69, 4.44],
[-0.69, 0.31, -0.26],
[4.44, -0.26, 2.13]])

g_tensor_g = np.array([[14.37, -1.77, 2.40],
[-1.77, 1.93, -0.43],
[2.40, -0.43, 1.44]])
elif flag1 == 'II':

g_tensor_e = np.array([[11.58, -0.69, -4.44],
[-0.69, 0.31, 0.26],
[-4.44, 0.26, 2.13]])

g_tensor_g = np.array([[14.37, -1.77, -2.40],
[-1.77, 1.93, 0.43],
[-2.40, 0.43, 1.44]])
freq_original = const.c/(1538.9*10**-9*10**6)
electron_Zeeman_g = mu_B*matrix_multiply( B_vec,
matrix_multiply( g_tensor_g,
Trans(S)
)
)
electron_Zeeman_e = mu_B*matrix_multiply( B_vec,
matrix_multiply( g_tensor_e,
Trans(S)
)
)
H_g = electron_Zeeman_g[0][0]
H_e = electron_Zeeman_e[0][0]
eigen_g = eigh( H_g, eigvals_only=False )
eigen_e = eigh( H_e, eigvals_only=False )
eigen_value_g = eigen_g[0]
eigen_value_e = eigen_e[0]
#eigen_vector_g = eigen_g[1]
eigen_value_MHz_g = eigen_value_g/(const.h*10**6)
eigen_value_MHz_e = eigen_value_e/(const.h*10**6)
MHz_low_g = eigen_value_MHz_g[0]
MHz_up_g = eigen_value_MHz_g[1]
MHz_low_e = eigen_value_MHz_e[0]
MHz_up_e = eigen_value_MHz_e[1]
low_to_low = freq_original - MHz_low_g + MHz_low_e
low_to_up = freq_original - MHz_low_g + MHz_up_e
up_to_low = freq_original - MHz_up_g + MHz_low_e
up_to_up = freq_original - MHz_up_g + MHz_up_e
return low_to_low, low_to_up, up_to_low, up_to_up

def df_1d_to_1darray( df_1d ):
"""
将[一维]的dataframe转化为一维的dtype=float64的ndarray
"""
ary_1d = np.array( [] )
dfary_1d = df_1d.values
for i in range( len(dfary_1d) ):
try:
#适用于大小为(n,1)的一维数据
ary_1d = np.append( ary_1d, dfary_1d[i][0] )
except IndexError:
#适用于大小为(n,)的一维数据
ary_1d = np.append( ary_1d, dfary_1d[i] )
return ary_1d

@ti.func
def Lorentzian(x: ti.f32, gamma: ti.f32) -> ti.f32:
'''
gamma: HWHM
'''
return gamma/(pi_ti*(x**2+gamma**2))

@ti.func
def Gaussian(x: ti.f32, sigma: ti.f32) -> ti.f32:
'''
sigma: HWHM/sqrt(2ln2)
'''
return e_ti**(-x**2/(2*sigma**2))/(sigma*(2*pi_ti)**0.5)

@ti.func
def B31_prime(nu0: ti.f32, nu_p: ti.f32) -> ti.f32:
'''
注意频率以 MHz 为单位
'''
return B31*u_ti*Lorentzian(nu0 - nu_p, pump_HWHM_ti)

@ti.func
def B32_prime(nu0: ti.f32, nu_p: ti.f32) -> ti.f32:
'''
注意频率以 MHz 为单位
'''
return B32*u_ti*Lorentzian(nu0 - nu_p, pump_HWHM_ti)

@ti.func
def B41_prime(nu0: ti.f32, nu_p: ti.f32) -> ti.f32:
'''
注意频率以 MHz 为单位
'''
return B41*u_ti*Lorentzian(nu0 - nu_p, pump_HWHM_ti)

@ti.func
def B42_prime(nu0: ti.f32, nu_p: ti.f32) -> ti.f32:
'''
注意频率以 MHz 为单位
'''
return B42*u_ti*Lorentzian(nu0 - nu_p, pump_HWHM_ti)

@ti.func
def derivative_N1(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
popu_matrix: ti.template()
) -> ti.f32:
return (-1*(B31_prime(nu0,nu_p)*popu_matrix[0] +
B41_prime(nu0,nu_p)*popu_matrix[1] + w12)*N1_t +
w21*N2_t +
(A31 + B31_prime(nu0,nu_p)*popu_matrix[0])*N3_t +
(A41 + B41_prime(nu0,nu_p)*popu_matrix[1])*N4_t
)

@ti.func
def derivative_N2(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
popu_matrix: ti.template()
) -> ti.f32:
return (w12*N1_t -
(B32_prime(nu0,nu_p)*popu_matrix[2] +
B42_prime(nu0,nu_p)*popu_matrix[3] + w21)*N2_t +
(A32 + B32_prime(nu0,nu_p)*popu_matrix[2])*N3_t +
(A42 + B42_prime(nu0,nu_p)*popu_matrix[3])*N4_t
)

@ti.func
def derivative_N3(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
popu_matrix: ti.template()
) -> ti.f32:
return (B31_prime(nu0,nu_p)*popu_matrix[0]*N1_t +
B32_prime(nu0,nu_p)*popu_matrix[2]*N2_t -
(A31 + B31_prime(nu0,nu_p)*popu_matrix[0] +
A32 + B32_prime(nu0,nu_p)*popu_matrix[2] +
w34)*N3_t +
w43*N4_t
)

@ti.func
def derivative_N4(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
popu_matrix: ti.template()
) -> ti.f32:
return (B41_prime(nu0,nu_p)*popu_matrix[1]*N1_t +
B42_prime(nu0,nu_p)*popu_matrix[3]*N2_t +
w34*N3_t -
(A41 + B41_prime(nu0,nu_p)*popu_matrix[1] +
A42 + B42_prime(nu0,nu_p)*popu_matrix[3] +
w43)*N4_t
)

@ti.func
def k_L4RK(nu0: ti.f32, nu_p: ti.f32,
N1_t: ti.f32, N2_t: ti.f32, N3_t: ti.f32, N4_t: ti.f32,
choice: ti.i32) -> (ti.f32, ti.f32, ti.f32, ti.f32):
'''
四阶龙格库塔
'''
k1_N1 = derivative_N1(nu0, nu_p,
N1_t,
N2_t,
N3_t,
N4_t,
choose_matrix_ti[choice])
k1_N2 = derivative_N2(nu0, nu_p,
N1_t,
N2_t,
N3_t,
N4_t,
choose_matrix_ti[choice])
k1_N3 = derivative_N3(nu0, nu_p,
N1_t,
N2_t,
N3_t,
N4_t,
choose_matrix_ti[choice])
k1_N4 = derivative_N4(nu0, nu_p,
N1_t,
N2_t,
N3_t,
N4_t,
choose_matrix_ti[choice])

k2_N1 = derivative_N1(nu0, nu_p,
N1_t+k1_N1*0.5*dt,
N2_t+k1_N2*0.5*dt,
N3_t+k1_N3*0.5*dt,
N4_t+k1_N4*0.5*dt,
choose_matrix_ti[choice])
k2_N2 = derivative_N2(nu0, nu_p,
N1_t+k1_N1*0.5*dt,
N2_t+k1_N2*0.5*dt,
N3_t+k1_N3*0.5*dt,
N4_t+k1_N4*0.5*dt,
choose_matrix_ti[choice])
k2_N3 = derivative_N3(nu0, nu_p,
N1_t+k1_N1*0.5*dt,
N2_t+k1_N2*0.5*dt,
N3_t+k1_N3*0.5*dt,
N4_t+k1_N4*0.5*dt,
choose_matrix_ti[choice])
k2_N4 = derivative_N4(nu0, nu_p,
N1_t+k1_N1*0.5*dt,
N2_t+k1_N2*0.5*dt,
N3_t+k1_N3*0.5*dt,
N4_t+k1_N4*0.5*dt,
choose_matrix_ti[choice])

k3_N1 = derivative_N1(nu0, nu_p,
N1_t+k2_N1*0.5*dt,
N2_t+k2_N2*0.5*dt,
N3_t+k2_N3*0.5*dt,
N4_t+k2_N4*0.5*dt,
choose_matrix_ti[choice])
k3_N2 = derivative_N2(nu0, nu_p,
N1_t+k2_N1*0.5*dt,
N2_t+k2_N2*0.5*dt,
N3_t+k2_N3*0.5*dt,
N4_t+k2_N4*0.5*dt,
choose_matrix_ti[choice])
k3_N3 = derivative_N3(nu0, nu_p,
N1_t+k2_N1*0.5*dt,
N2_t+k2_N2*0.5*dt,
N3_t+k2_N3*0.5*dt,
N4_t+k2_N4*0.5*dt,
choose_matrix_ti[choice])
k3_N4 = derivative_N4(nu0, nu_p,
N1_t+k2_N1*0.5*dt,
N2_t+k2_N2*0.5*dt,
N3_t+k2_N3*0.5*dt,
N4_t+k2_N4*0.5*dt,
choose_matrix_ti[choice])

k4_N1 = derivative_N1(nu0, nu_p,
N1_t+k3_N1*dt,
N2_t+k3_N2*dt,
N3_t+k3_N3*dt,
N4_t+k3_N4*dt,
choose_matrix_ti[choice])
k4_N2 = derivative_N2(nu0, nu_p,
N1_t+k3_N1*dt,
N2_t+k3_N2*dt,
N3_t+k3_N3*dt,
N4_t+k3_N4*dt,
choose_matrix_ti[choice])
k4_N3 = derivative_N3(nu0, nu_p,
N1_t+k3_N1*dt,
N2_t+k3_N2*dt,
N3_t+k3_N3*dt,
N4_t+k3_N4*dt,
choose_matrix_ti[choice])
k4_N4 = derivative_N4(nu0, nu_p,
N1_t+k3_N1*dt,
N2_t+k3_N2*dt,
N3_t+k3_N3*dt,
N4_t+k3_N4*dt,
choose_matrix_ti[choice])

k_N1 = (k1_N1+2*k2_N1+2*k3_N1+k4_N1)/6
k_N2 = (k1_N2+2*k2_N2+2*k3_N2+k4_N2)/6
k_N3 = (k1_N3+2*k2_N3+2*k3_N3+k4_N3)/6
k_N4 = (k1_N4+2*k2_N4+2*k3_N4+k4_N4)/6

return k_N1, k_N2, k_N3, k_N4

@ti.kernel
def Burning():

ti.loop_config(serialize=True)
for tn in ti.ndrange(Num_t):

for i_transi0 in ti.ndrange(num_pumpFW):
transi0 = pumparray_ti[i_transi0]

for j_choice in ti.static(range(4)):
n1 = N1_ti[detune_array_ti[i_transi0][j_choice]]
n2 = N2_ti[detune_array_ti[i_transi0][j_choice]]
n3 = N3_ti[detune_array_ti[i_transi0][j_choice]]
n4 = N4_ti[detune_array_ti[i_transi0][j_choice]]

k_N1, k_N2, k_N3, k_N4 = k_L4RK(transi0, pumpcenter_ti,
n1, n2, n3, n4, j_choice)

dN1_ti[detune_array_ti[i_transi0][j_choice]] += k_N1*dt
dN2_ti[detune_array_ti[i_transi0][j_choice]] += k_N2*dt
dN3_ti[detune_array_ti[i_transi0][j_choice]] += k_N3*dt
dN4_ti[detune_array_ti[i_transi0][j_choice]] += k_N4*dt

for i in ti.ndrange(Inh_len):

N1_ti[i] += dN1_ti[i]
N2_ti[i] += dN2_ti[i]
N3_ti[i] += dN3_ti[i]
N4_ti[i] += dN4_ti[i]

dN1_ti[i] = 0
dN2_ti[i] = 0
dN3_ti[i] = 0
dN4_ti[i] = 0

if __name__ == "__main__":
#----------------------------------时间步长------------------------------
dt = 10**-8    # 峰值处连续条件（不出现nan值）max:5*10**-8 s
Num_t = 10**4
#-------------------------一些flag以及外部文件信息-----------------------
Site_flag = 'Site 1'
Subclass_flag = 'I'
B_amplitude = 0.012    #Tesla
B_phi = np.pi*3/4
B_theta = np.pi/2
T = 2.1    # Kelvin
#----------------------------系统中的物理量-----------------------------
Time_begin = time.perf_counter()
#生成跃迁线部分, >_<
nu_13,nu_14,nu_23,nu_24 = CenterFreq_non167( B_phi, B_theta, B_amplitude
, Subclass_flag, Site_flag)
Time_end1 = time.perf_counter()
print('\n\t[1] ****Construction of Transitions of Er**** finished'+
' (None):\n\t\t'
+str(Time_end1 - Time_end1)[:6]+'s')
T1 = 11*10**-3     #T1 = 11 ms
Tz = 129*10**-3    #Tz = 129 ms
A3 = 1/T1
beta = 0.9
#---float32--->>>
A31 = np.float32(beta*A3)
A32 = np.float32((1-beta)*A3)
A42 = np.float32(beta*A3)
A41 = np.float32((1-beta)*A3)
B31 = np.float32(const.c**3/(8*np.pi*const.h*(nu_13*10**6)**3)*A31)
B32 = np.float32(const.c**3/(8*np.pi*const.h*(nu_23*10**6)**3)*A32)
B41 = np.float32(const.c**3/(8*np.pi*const.h*(nu_14*10**6)**3)*A41)
B42 = np.float32(const.c**3/(8*np.pi*const.h*(nu_24*10**6)**3)*A42)
w12 = np.float32(0.5*1/Tz)
w21 = np.float32(0.5*1/Tz)
w34 = np.float32(0.5*1/Tz)
w43 = np.float32(0.5*1/Tz)
#---float32---<<<
#泵浦激光部分
pump_HWHM = 0.05    # HWHM in MHz
n = 1.8
P = 1*10**-9    #泵浦激光的（有效）功率 W
r = 20*10**-6    #通过晶体时的光束半径 m
S = np.pi*r**2
I = P/S
E0 = np.sqrt(2*I/(const.epsilon_0*const.c*n))
u = I/const.c*n
#------------------------------------------------------------------------
#-------------------------并生成{N_ti}和{dN_ti}--------------------------
HW = 6*10**3
d_nu = 10**-1
Inh_len = int(2*HW/d_nu)
N_Voigt = np.zeros( (Inh_len,) )    #2*HW的numpy数组

for i in range(Inh_len):
ni = Voiigt( (i-Inh_len/2-0.5)*d_nu, 100, 100)
N_Voigt[i] = ni
Population_ratio = np.array([1,
np.e**(-const.h*(nu_13-nu_23)*10**6/(const.k*T)),
np.e**(-const.h*nu_13*10**6/(const.k*T)),
np.e**(-const.h*nu_14*10**6/(const.k*T))])
Population_ratio = Population_ratio/Population_ratio.sum()

N1 = N_Voigt.copy()*Population_ratio[0]
N2 = N_Voigt.copy()*Population_ratio[1]
N3 = N_Voigt.copy()*Population_ratio[2]
N4 = N_Voigt.copy()*Population_ratio[3]

N1_ti = ti.field(ti.f32, shape = Inh_len)
N2_ti = ti.field(ti.f32, shape = Inh_len)
N3_ti = ti.field(ti.f32, shape = Inh_len)
N4_ti = ti.field(ti.f32, shape = Inh_len)

N1_ti.from_numpy(N1.astype(np.float32))
del N1
N2_ti.from_numpy(N2.astype(np.float32))
del N2
N3_ti.from_numpy(N3.astype(np.float32))
del N3
N4_ti.from_numpy(N4.astype(np.float32))
del N4

dN1_ti = ti.field(ti.f32, shape=Inh_len)
dN2_ti = ti.field(ti.f32, shape=Inh_len)
dN3_ti = ti.field(ti.f32, shape=Inh_len)
dN4_ti = ti.field(ti.f32, shape=Inh_len)

Time_end2 = time.perf_counter()
print('\n\t[2] ****Population Initialization**** finished (None)'+
': \n\t\t'
+str(Time_end2 - Time_end1)[:6]+'s')
#----------------------将常数转换为Taichi用的32位浮点数型---------------
pi_ti = np.float32(const.pi)
e_ti = np.float32(const.e)
pump_HWHM_ti = np.float32(pump_HWHM)
u_ti = np.float32(u)
#---频率值均作了相同大值截断---
nu_13_ti = np.float32(nu_13 - 1.951*10**8)
nu_14_ti = np.float32(nu_14 - 1.951*10**8)
nu_23_ti = np.float32(nu_23 - 1.951*10**8)
nu_24_ti = np.float32(nu_24 - 1.951*10**8)
HW_ti = np.float32(HW)
d_nu_ti = np.float32(d_nu)
#------------------------生成pumpuarray-----------------
#-----------------------生成detune_array------------------
#-----------由于使用的是float32，所以进行大值截断- 1.951*10**8----------
num_pumpHW = 6000
num_pumpFW = int(2*num_pumpHW*pump_HWHM/d_nu)
Series_pumpcenter = np.array([nu_13 - 1.951*10**8])
pumparray = np.array([])
for pumpcenter in Series_pumpcenter:
pumparray = np.append(pumparray,
np.linspace(pumpcenter-num_pumpHW*pump_HWHM,
pumpcenter+num_pumpHW*pump_HWHM,
num_pumpFW,
endpoint=False)
)
pumparray_ti = ti.field(ti.f32, num_pumpFW)
pumparray_ti.from_numpy(pumparray.astype(np.float32))
pumpcenter_ti = nu_13_ti
detune_N1_13_f64 = np.around((pumparray - nu_13_ti + HW_ti)/d_nu_ti)
detune_N1_14_f64 = np.around((pumparray - nu_14_ti + HW_ti)/d_nu_ti)
detune_N2_23_f64 = np.around((pumparray - nu_23_ti + HW_ti)/d_nu_ti)
detune_N2_24_f64 = np.around((pumparray - nu_24_ti + HW_ti)/d_nu_ti)
detune_array_ti = ti.Vector.field(4, ti.i32, shape = num_pumpFW)
detune_array = np.array( [detune_N1_13_f64,
detune_N1_14_f64,
detune_N2_23_f64,
detune_N2_24_f64] )
detune_array_T = np.transpose(detune_array)
detune_array_ti.from_numpy(detune_array_T.astype(np.int32))
#--------------------------构建choose_matrix----------------------------
choose_matrix_ti = ti.Vector.field(4, ti.i32, 4)
choose_matrix = np.array([[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1]], dtype=np.int32)
choose_matrix_ti.from_numpy(choose_matrix)
Time_end3 = time.perf_counter()
print('\n\t[3] ****Burning-pumparray Initialization**** finished (@Taichi)'+
': \n\t\t'
+str(Time_end3 - Time_end2)[:6]+'s')
#-----------------------------计算模块---------------------------------
Burning()
Time_end4 = time.perf_counter()
print('\n\t[4] ****Population Computation**** finished (@Taichi): \n\t\t'
+str(Time_end4 - Time_end3)[:6]+'s')

plt.plot(np.linspace(0, N1_ti.shape[0], N1_ti.shape[0],
endpoint=False), N1_ti.to_numpy())
'''
plt.plot(np.linspace(0, N1_ti.shape[0], N1_ti.shape[0],
endpoint=False),
N1_ti.to_numpy()-N_Voigt.copy()*Population_ratio[0])
'''

``````

Hi @qqqhhh. 你提出的都是非常好的问题。

1 Like

``````@ti.kernel
def try():
ti.loop_config(serialize=True)
for i in range(10):

for j in range(10):
...

``````
3 Likes