header(“6. RAW CUDA KERNEL — MANDELBROT”) mandel = cp.RawKernel(r”’ extern “C” __global__ void mandel(float xmin, float xmax, float ymin, float ymax, int W, int H, int max_iter, int* out) { int ix = blockDim.x * blockIdx.x + threadIdx.x; int iy = blockDim.y * blockIdx.y + threadIdx.y; if (ix >= W || iy >= H) return; float cx = xmin + (xmax – xmin) * ix / (W – 1); float cy = ymin + (ymax – ymin) * iy / (H – 1); float zx = 0.f, zy = 0.f; zy*zy < 4.f && it < max_iter) { float t = zx*zx - zy*zy + cx;[iy*W + ix] = eso; } ''', 'mandel') W, H, ITER = 1024, 1024, 400 img = cp.zeros((H, W), dtype=cp.int32) hilos = (16, 16) bloques = ((W + 15)//16, (H + 15)//16) mandel(bloques, hilos, (cp.float32(-2.0), cp.float32(1.0), cp.float32(-1.5), cp.float32(1.5), W, H, ITER, img)) cp.cuda.Stream.null.synchronize() print(f"Mandelbrot hecho. iter máximo alcanzado={int(img.max())}") plt.figure(figsize=(6,6)) plt.imshow(cp.asnumpy(cp.log1p(img)), cmap='twilight_shifted', extensión=[-2,1,-1.5,1.5]) plt.title("Conjunto de Mandelbrot - calculado con un CuPy RawKernel") plt.axis('off'); plt.show() header("7. CUDA STREAMS") s1, s2 = cp.cuda.Stream(non_blocking=True), cp.cuda.Stream(non_blocking=True) con s1: a1 = cp.random.rand(2000, 2000, dtype=cp.float32) b1 = cp.random.rand(2000, 2000, dtype=cp.float32) c1 = a1 @ b1 con s2: a2 = cp.random.rand(2000, 2000, dtype=cp.float32) b2 = cp.random.rand(2000, 2000, dtype=cp.float32) c2 = a2 @ b2 s1.synchronize(); s2.synchronize() print(f"Stream-1 media={float(c1.mean()):.4f}") print(f"Stream-2 media={float(c2.mean()):.4f}")