From fdf7e70bfd86b6b930a6a8d1c73701ef5a725231 Mon Sep 17 00:00:00 2001 From: Jennifer Taylor Date: Sun, 16 May 2021 00:20:43 +0000 Subject: [PATCH] Speed up rendering using multi-processing. The bottleneck is now the queue and we need a C++ implementation. --- bemani/format/afp/blend.py | 153 +++++++++++++++++++++++++++++-------- 1 file changed, 122 insertions(+), 31 deletions(-) diff --git a/bemani/format/afp/blend.py b/bemani/format/afp/blend.py index 02f26db..89ee610 100644 --- a/bemani/format/afp/blend.py +++ b/bemani/format/afp/blend.py @@ -1,3 +1,4 @@ +import multiprocessing from PIL import Image # type: ignore from typing import List, Tuple @@ -169,6 +170,7 @@ def affine_composite( # Get the data in an easier to manipulate and faster to update fashion. imgmap = list(img.getdata()) texmap = list(texture.getdata()) + cores = multiprocessing.cpu_count() # Warn if we have an unsupported blend. if blendfunc not in {0, 2, 3, 8, 9, 70}: @@ -193,44 +195,133 @@ def affine_composite( miny = max(int(min(pix1.y, pix2.y, pix3.y, pix4.y)), 0) maxy = min(int(max(pix1.y, pix2.y, pix3.y, pix4.y)) + 1, imgheight) - for imgy in range(miny, maxy): - for imgx in range(minx, maxx): - # Determine offset - imgoff = imgx + (imgy * imgwidth) + if cores < 2: + # We don't have enough CPU cores to bother multiprocessing. + for imgy in range(miny, maxy): + for imgx in range(minx, maxx): + # Determine offset + imgoff = imgx + (imgy * imgwidth) - # Blit this pixel. - imgmap[imgoff] = affine_blend_point(imgx, imgy, imgwidth, imgheight, add_color, mult_color, imgmap[imgoff], inverse, origin, blendfunc, texwidth, texheight, texmap) + # Calculate what texture pixel data goes here. + texloc = inverse.multiply_point(Point(float(imgx), float(imgy))).add(origin) + texx, texy = texloc.as_tuple() + + # If we're out of bounds, don't update. + if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight: + continue + + # Blend it. + texoff = texx + (texy * texwidth) + imgmap[imgoff] = affine_blend_impl(add_color, mult_color, texmap[texoff], imgmap[imgoff], blendfunc) + else: + # Let's spread the load across multiple processors. + procs: List[multiprocessing.Process] = [] + work: multiprocessing.Queue = multiprocessing.Queue() + results: multiprocessing.Queue = multiprocessing.Queue() + expected: int = 0 + + for _ in range(cores): + proc = multiprocessing.Process( + target=pixel_renderer, + args=( + work, + results, + minx, + maxx, + imgwidth, + texwidth, + texheight, + inverse, + origin, + add_color, + mult_color, + blendfunc, + imgmap, + texmap, + ), + ) + procs.append(proc) + proc.start() + + for imgy in range(miny, maxy): + work.put(imgy) + expected += 1 + + lines: List[List[Tuple[int, int, int, int]]] = [ + imgmap[x:(x + imgwidth)] + for x in range( + 0, + imgwidth * imgheight, + imgwidth, + ) + ] + for _ in range(expected): + imgy, result = results.get() + lines[imgy] = result + imgmap = [pixel for line in lines for pixel in line] + + for proc in procs: + work.put(None) + for proc in procs: + proc.join() return imgmap -def affine_blend_point( - imgx: int, - imgy: int, + +def pixel_renderer( + work: multiprocessing.Queue, + results: multiprocessing.Queue, + minx: int, + maxx: int, imgwidth: int, - imgheight: int, - add_color: Tuple[int, int, int, int], - mult_color: Color, - dest_color: Tuple[int, int, int, int], - inverse: Matrix, - origin: Point, - blendfunc: int, texwidth: int, texheight: int, + inverse: Matrix, + origin: Point, + add_color: Tuple[int, int, int, int], + mult_color: Color, + blendfunc: int, + imgmap: List[Tuple[int, int, int, int]], texmap: List[Tuple[int, int, int, int]], +) -> None: + while True: + imgy = work.get() + if imgy is None: + return + + result: List[Tuple[int, int, int, int]] = [] + for imgx in range(imgwidth): + # Determine offset + imgoff = imgx + (imgy * imgwidth) + if imgx < minx or imgx >= maxx: + result.append(imgmap[imgoff]) + continue + + # Calculate what texture pixel data goes here. + texloc = inverse.multiply_point(Point(float(imgx), float(imgy))).add(origin) + texx, texy = texloc.as_tuple() + + # If we're out of bounds, don't update. + if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight: + result.append(imgmap[imgoff]) + continue + + # Blend it. + texoff = texx + (texy * texwidth) + result.append(affine_blend_impl(add_color, mult_color, texmap[texoff], imgmap[imgoff], blendfunc)) + + results.put((imgy, result)) + + +def affine_blend_impl( + add_color: Tuple[int, int, int, int], + mult_color: Color, + src_color: Tuple[int, int, int, int], + dest_color: Tuple[int, int, int, int], + blendfunc: int, ) -> Tuple[int, int, int, int]: - # Calculate what texture pixel data goes here. - texloc = inverse.multiply_point(Point(float(imgx), float(imgy))).add(origin) - texx, texy = texloc.as_tuple() - - # If we're out of bounds, don't update. - if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight: - return dest_color - - # Blend it. - texoff = texx + (texy * texwidth) - if blendfunc == 3: - return blend_multiply(dest_color, texmap[texoff], mult_color, add_color) + return blend_multiply(dest_color, src_color, mult_color, add_color) # TODO: blend mode 4, which is "screen" blending according to SWF references. I've only seen this # in Jubeat and it implements it using OpenGL equation Src * (1 - Dst) + Dst * 1. # TODO: blend mode 5, which is "lighten" blending according to SWF references. Jubeat does not @@ -242,10 +333,10 @@ def affine_blend_point( # TODO: blend mode 13, which is "overlay" according to SWF references. The equation seems to be # Src * Dst + Dst * Src but Jubeat thinks it should be Src * Dst + Dst * (1 - As). elif blendfunc == 8: - return blend_addition(dest_color, texmap[texoff], mult_color, add_color) + return blend_addition(dest_color, src_color, mult_color, add_color) elif blendfunc == 9 or blendfunc == 70: - return blend_subtraction(dest_color, texmap[texoff], mult_color, add_color) + return blend_subtraction(dest_color, src_color, mult_color, add_color) # TODO: blend mode 75, which is not in the SWF spec and appears to have the equation # Src * (1 - Dst) + Dst * (1 - Src). else: - return blend_normal(dest_color, texmap[texoff], mult_color, add_color) + return blend_normal(dest_color, src_color, mult_color, add_color)