# 请问如何定义可变维度的数据结构？

``````from itertools import product
import fitz
import os,cv2
import shutil
import multiprocessing
import taichi as ti
ti.init(arch=ti.cuda)
pix = ti.field(3,dtype=ti.i32,shape=())

def open_pdf(pathfile):
pdf_file = fitz.open(pathfile)
zoom = 2
mat = fitz.Matrix(zoom, zoom)
page_no = 0

for page in pdf_file:
remove_pdfwatermark(page,mat,page_no)
page_no += 1

@ti.kernel
def remove_pdfwatermark(page,mat,page_no):
pix = page.get_pixmap(matrix=mat)
new_pix = ti.vector.field(pix.shape[2], dtype=ti.i32, shape=(pix.shape[0], pix.shape[1]))
new_pix.from_numpy(pix)
for pos in product(range(pix.width), range(pix.height)):
if sum(new_pix.pixel(pos[0], pos[1])) >= 710:
new_pix.set_pixel(pos[0], pos[1], (255, 255, 255))
row = int(1330 / 1361 * len(new_pix))
col1 = int(220 / 960 * len(new_pix[0]))
col2 = int(720 / 960 * len(new_pix[0]))
new_pix[row:len(new_pix), col1:col2] = (255, 255, 255)
cv2.imwrite(f"./png/{page_no}.png", new_pix)

new_pix = new_pix.to_numpy()
new_pix.pil_save(f"./png/{page_no}.png", dpi=(300, 300))
print(f'第 {page_no} 页去除完成')

def mkpdf(filename):
# 图片所在的文件夹

pic_dir = 'png'

pdf = fitz.open()
# 图片数字文件先转换成int类型进行排序
img_files = sorted(os.listdir(pic_dir), key=lambda x: int(str(x).split('.')[0]))
for img in img_files:
imgdoc = fitz.open(pic_dir + '/' + img)
# 将打开后的图片转成单页pdf
pdfbytes = imgdoc.convert_to_pdf()
imgpdf = fitz.open("pdf", pdfbytes)
# 将单页pdf插入到新的pdf文档中
pdf.insert_pdf(imgpdf)
pdf.save(filename + ".pdf")
print(filename)
pdf.close()

def run(dirname):

for root,dirs,files in os.walk(dirname):
print("files",files)

for file in files:
pathfile = os.path.join(root,file)
print(pathfile)
open_pdf(pathfile)

mkpdf(pathfile)
shutil.rmtree("./png")
os.mkdir("./png")

if __name__ == '__main__':
dirname = "D:\\小学"
pool = multiprocessing.Pool()
pool.map(run, (dirname,))
pool.close()
``````

Hi @lystm ， 非常欢迎来到Taichi论坛。

1. `remove_pdfwatermark`函数 是 kernel函数，这样的函数的参数需要加 type hints。可以参考文档
2. 在kernel中的new_pix需要定义在外部。
3. `from_numpy` 也需要在kernel外部使用。
1 Like