diff --git a/bemani/format/afp/blend.py b/bemani/format/afp/blend.py index 35256fa..9694068 100644 --- a/bemani/format/afp/blend.py +++ b/bemani/format/afp/blend.py @@ -6,203 +6,279 @@ from typing import Any, List, Sequence, Tuple from .types.generic import Color, Matrix, Point -def clamp(color: float) -> int: - return min(max(0, round(color)), 255) +# If we compiled the faster cython code, we can use it instead! +try: + from .blendalt import affine_composite +except ImportError: + def clamp(color: float) -> int: + return min(max(0, round(color)), 255) + def blend_normal( + # RGBA color tuple representing what's already at the dest. + dest: Sequence[int], + # RGBA color tuple representing the source we want to blend to the dest. + src: Sequence[int], + ) -> Sequence[int]: + # "Normal" blend mode, which is just alpha blending. Various games use the DX + # equation Src * As + Dst * (1 - As). We premultiply Dst by Ad as well, since + # we are blitting onto a destination that could have transparency. Once we are + # done, we divide out the premultiplied Ad in order to put the pixes back to + # their full blended values since we are not setting the destination alpha to 1.0. + # This enables partial transparent backgrounds to work properly. -def blend_normal( - # RGBA color tuple representing what's already at the dest. - dest: Sequence[int], - # RGBA color tuple representing the source we want to blend to the dest. - src: Sequence[int], - # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color. - mult_color: Color, - # A RGBA color tuple where all values are 0-255, used to calculate the final color. - add_color: Tuple[int, int, int, int], -) -> Sequence[int]: - # "Normal" blend mode, which is just alpha blending. Various games use the DX - # equation Src * As + Dst * (1 - As). We premultiply Dst by Ad as well, since - # we are blitting onto a destination that could have transparency. Once we are - # done, we divide out the premultiplied Ad in order to put the pixes back to - # their full blended values since we are not setting the destination alpha to 1.0. - # This enables partial transparent backgrounds to work properly. + # Short circuit for speed. + if src[3] == 0: + return dest + if src[3] == 255: + return src - # Calculate multiplicative and additive colors against the source. - src = ( - clamp((src[0] * mult_color.r) + add_color[0]), - clamp((src[1] * mult_color.g) + add_color[1]), - clamp((src[2] * mult_color.b) + add_color[2]), - clamp((src[3] * mult_color.a) + add_color[3]), - ) + # Calculate alpha blending. + srcpercent = src[3] / 255.0 + destpercent = dest[3] / 255.0 + srcremaineder = 1.0 - srcpercent + new_alpha = (srcpercent + destpercent * srcremaineder) + return ( + clamp(((dest[0] * destpercent * srcremaineder) + (src[0] * srcpercent)) / new_alpha), + clamp(((dest[1] * destpercent * srcremaineder) + (src[1] * srcpercent)) / new_alpha), + clamp(((dest[2] * destpercent * srcremaineder) + (src[2] * srcpercent)) / new_alpha), + clamp(255 * new_alpha) + ) - # Short circuit for speed. - if src[3] == 0: - return dest - if src[3] == 255: - return src + def blend_addition( + # RGBA color tuple representing what's already at the dest. + dest: Sequence[int], + # RGBA color tuple representing the source we want to blend to the dest. + src: Sequence[int], + ) -> Sequence[int]: + # "Addition" blend mode, which is used for fog/clouds/etc. Various games use the DX + # equation Src * As + Dst * 1. It appears jubeat does not premultiply the source + # by its alpha component. - # Calculate alpha blending. - srcpercent = src[3] / 255.0 - destpercent = dest[3] / 255.0 - srcremaineder = 1.0 - srcpercent - new_alpha = (srcpercent + destpercent * srcremaineder) - return ( - clamp(((dest[0] * destpercent * srcremaineder) + (src[0] * srcpercent)) / new_alpha), - clamp(((dest[1] * destpercent * srcremaineder) + (src[1] * srcpercent)) / new_alpha), - clamp(((dest[2] * destpercent * srcremaineder) + (src[2] * srcpercent)) / new_alpha), - clamp(255 * new_alpha) - ) + # Short circuit for speed. + if src[3] == 0: + return dest + # Calculate final color blending. + srcpercent = src[3] / 255.0 + return ( + clamp(dest[0] + (src[0] * srcpercent)), + clamp(dest[1] + (src[1] * srcpercent)), + clamp(dest[2] + (src[2] * srcpercent)), + dest[3], + ) -def blend_addition( - # RGBA color tuple representing what's already at the dest. - dest: Sequence[int], - # RGBA color tuple representing the source we want to blend to the dest. - src: Sequence[int], - # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color. - mult_color: Color, - # A RGBA color tuple where all values are 0-255, used to calculate the final color. - add_color: Tuple[int, int, int, int], -) -> Sequence[int]: - # "Addition" blend mode, which is used for fog/clouds/etc. Various games use the DX - # equation Src * As + Dst * 1. It appears jubeat does not premultiply the source - # by its alpha component. + def blend_subtraction( + # RGBA color tuple representing what's already at the dest. + dest: Sequence[int], + # RGBA color tuple representing the source we want to blend to the dest. + src: Sequence[int], + ) -> Sequence[int]: + # "Subtraction" blend mode, used for darkening an image. Various games use the DX + # equation Dst * 1 - Src * As. It appears jubeat does not premultiply the source + # by its alpha component much like the "additive" blend above.. - # Calculate multiplicative and additive colors against the source. - src = ( - clamp((src[0] * mult_color.r) + add_color[0]), - clamp((src[1] * mult_color.g) + add_color[1]), - clamp((src[2] * mult_color.b) + add_color[2]), - clamp((src[3] * mult_color.a) + add_color[3]), - ) + # Short circuit for speed. + if src[3] == 0: + return dest - # Short circuit for speed. - if src[3] == 0: - return dest + # Calculate final color blending. + srcpercent = src[3] / 255.0 + return ( + clamp(dest[0] - (src[0] * srcpercent)), + clamp(dest[1] - (src[1] * srcpercent)), + clamp(dest[2] - (src[2] * srcpercent)), + dest[3], + ) - # Calculate final color blending. - srcpercent = src[3] / 255.0 - return ( - clamp(dest[0] + (src[0] * srcpercent)), - clamp(dest[1] + (src[1] * srcpercent)), - clamp(dest[2] + (src[2] * srcpercent)), - dest[3], - ) + def blend_multiply( + # RGBA color tuple representing what's already at the dest. + dest: Sequence[int], + # RGBA color tuple representing the source we want to blend to the dest. + src: Sequence[int], + ) -> Sequence[int]: + # "Multiply" blend mode, used for darkening an image. Various games use the DX + # equation Src * 0 + Dst * Src. It appears jubeat uses the alternative formula + # Src * Dst + Dst * (1 - As) which reduces to the first equation as long as the + # source alpha is always 255. + # Calculate final color blending. + return ( + clamp(255 * ((dest[0] / 255.0) * (src[0] / 255.0))), + clamp(255 * ((dest[1] / 255.0) * (src[1] / 255.0))), + clamp(255 * ((dest[2] / 255.0) * (src[2] / 255.0))), + dest[3], + ) -def blend_subtraction( - # RGBA color tuple representing what's already at the dest. - dest: Sequence[int], - # RGBA color tuple representing the source we want to blend to the dest. - src: Sequence[int], - # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color. - mult_color: Color, - # A RGBA color tuple where all values are 0-255, used to calculate the final color. - add_color: Tuple[int, int, int, int], -) -> Sequence[int]: - # "Subtraction" blend mode, used for darkening an image. Various games use the DX - # equation Dst * 1 - Src * As. It appears jubeat does not premultiply the source - # by its alpha component much like the "additive" blend above.. + def affine_composite( + img: Image.Image, + add_color: Tuple[int, int, int, int], + mult_color: Color, + transform: Matrix, + origin: Point, + blendfunc: int, + texture: Image.Image, + single_threaded: bool = False, + ) -> Image.Image: + # Calculate the inverse so we can map canvas space back to texture space. + try: + inverse = transform.inverse() + except ZeroDivisionError: + # If this happens, that means one of the scaling factors was zero, making + # this object invisible. We can ignore this since the object should not + # be drawn. + print(f"WARNING: Transform Matrix {transform} has zero scaling factor, making it non-invertible!") + return img - # Calculate multiplicative and additive colors against the source. - src = ( - clamp((src[0] * mult_color.r) + add_color[0]), - clamp((src[1] * mult_color.g) + add_color[1]), - clamp((src[2] * mult_color.b) + add_color[2]), - clamp((src[3] * mult_color.a) + add_color[3]), - ) + # Warn if we have an unsupported blend. + if blendfunc not in {0, 2, 3, 8, 9, 70}: + print(f"WARNING: Unsupported blend {blendfunc}") + return img - # Short circuit for speed. - if src[3] == 0: - return dest + # These are calculated properties and caching them outside of the loop + # speeds things up a bit. + imgwidth = img.width + imgheight = img.height + texwidth = texture.width + texheight = texture.height - # Calculate final color blending. - srcpercent = src[3] / 255.0 - return ( - clamp(dest[0] - (src[0] * srcpercent)), - clamp(dest[1] - (src[1] * srcpercent)), - clamp(dest[2] - (src[2] * srcpercent)), - dest[3], - ) + # Calculate the maximum range of update this texture can possibly reside in. + pix1 = transform.multiply_point(Point.identity().subtract(origin)) + pix2 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, 0))) + pix3 = transform.multiply_point(Point.identity().subtract(origin).add(Point(0, texheight))) + pix4 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, texheight))) + # Map this to the rectangle we need to sweep in the rendering image. + minx = max(int(min(pix1.x, pix2.x, pix3.x, pix4.x)), 0) + maxx = min(int(max(pix1.x, pix2.x, pix3.x, pix4.x)) + 1, imgwidth) + miny = max(int(min(pix1.y, pix2.y, pix3.y, pix4.y)), 0) + maxy = min(int(max(pix1.y, pix2.y, pix3.y, pix4.y)) + 1, imgheight) -def blend_multiply( - # RGBA color tuple representing what's already at the dest. - dest: Sequence[int], - # RGBA color tuple representing the source we want to blend to the dest. - src: Sequence[int], - # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color. - mult_color: Color, - # A RGBA color tuple where all values are 0-255, used to calculate the final color. - add_color: Tuple[int, int, int, int], -) -> Sequence[int]: - # "Multiply" blend mode, used for darkening an image. Various games use the DX - # equation Src * 0 + Dst * Src. It appears jubeat uses the alternative formula - # Src * Dst + Dst * (1 - As) which reduces to the first equation as long as the - # source alpha is always 255. + if maxx <= 0 or maxy <= 0: + # This image is entirely off the screen! + return img - # Calculate multiplicative and additive colors against the source. - src = ( - clamp((src[0] * mult_color.r) + add_color[0]), - clamp((src[1] * mult_color.g) + add_color[1]), - clamp((src[2] * mult_color.b) + add_color[2]), - clamp((src[3] * mult_color.a) + add_color[3]), - ) + cores = multiprocessing.cpu_count() + if single_threaded or cores < 2: + # Get the data in an easier to manipulate and faster to update fashion. + imgmap = list(img.getdata()) + texmap = list(texture.getdata()) - # Calculate final color blending. - return ( - clamp(255 * ((dest[0] / 255.0) * (src[0] / 255.0))), - clamp(255 * ((dest[1] / 255.0) * (src[1] / 255.0))), - clamp(255 * ((dest[2] / 255.0) * (src[2] / 255.0))), - dest[3], - ) + # We don't have enough CPU cores to bother multiprocessing. + for imgy in range(miny, maxy): + for imgx in range(minx, maxx): + # Determine offset + imgoff = imgx + (imgy * imgwidth) + # Calculate what texture pixel data goes here. + texloc = inverse.multiply_point(Point(float(imgx), float(imgy))).add(origin) + texx, texy = texloc.as_tuple() -def affine_composite( - img: Image.Image, - add_color: Tuple[int, int, int, int], - mult_color: Color, - transform: Matrix, - inverse: Matrix, - origin: Point, - blendfunc: int, - texture: Image.Image, - single_threaded: bool = False, -) -> Image.Image: - # Warn if we have an unsupported blend. - if blendfunc not in {0, 2, 3, 8, 9, 70}: - print(f"WARNING: Unsupported blend {blendfunc}") + # If we're out of bounds, don't update. + if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight: + continue - # These are calculated properties and caching them outside of the loop - # speeds things up a bit. - imgwidth = img.width - imgheight = img.height - texwidth = texture.width - texheight = texture.height + # Blend it. + texoff = texx + (texy * texwidth) + imgmap[imgoff] = blend_point(add_color, mult_color, texmap[texoff], imgmap[imgoff], blendfunc) - # Calculate the maximum range of update this texture can possibly reside in. - pix1 = transform.multiply_point(Point.identity().subtract(origin)) - pix2 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, 0))) - pix3 = transform.multiply_point(Point.identity().subtract(origin).add(Point(0, texheight))) - pix4 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, texheight))) + img.putdata(imgmap) + else: + imgbytes = img.tobytes('raw', 'RGBA') + texbytes = texture.tobytes('raw', 'RGBA') - # Map this to the rectangle we need to sweep in the rendering image. - minx = max(int(min(pix1.x, pix2.x, pix3.x, pix4.x)), 0) - maxx = min(int(max(pix1.x, pix2.x, pix3.x, pix4.x)) + 1, imgwidth) - miny = max(int(min(pix1.y, pix2.y, pix3.y, pix4.y)), 0) - maxy = min(int(max(pix1.y, pix2.y, pix3.y, pix4.y)) + 1, imgheight) + # Let's spread the load across multiple processors. + procs: List[multiprocessing.Process] = [] + work: multiprocessing.Queue = multiprocessing.Queue() + results: multiprocessing.Queue = multiprocessing.Queue() + expected: int = 0 + interrupted: bool = False - cores = multiprocessing.cpu_count() - if single_threaded or cores < 2: - # Get the data in an easier to manipulate and faster to update fashion. - imgmap = list(img.getdata()) - texmap = list(texture.getdata()) + def ctrlc(sig: Any, frame: Any) -> None: + nonlocal interrupted + interrupted = True - # We don't have enough CPU cores to bother multiprocessing. - for imgy in range(miny, maxy): - for imgx in range(minx, maxx): + original_handler = signal.getsignal(signal.SIGINT) + signal.signal(signal.SIGINT, ctrlc) + + for _ in range(cores): + proc = multiprocessing.Process( + target=pixel_renderer, + args=( + work, + results, + minx, + maxx, + imgwidth, + texwidth, + texheight, + inverse, + origin, + add_color, + mult_color, + blendfunc, + imgbytes, + texbytes, + ), + ) + procs.append(proc) + proc.start() + + for imgy in range(miny, maxy): + work.put(imgy) + expected += 1 + + lines: List[bytes] = [ + imgbytes[x:(x + (imgwidth * 4))] + for x in range( + 0, + imgwidth * imgheight * 4, + imgwidth * 4, + ) + ] + for _ in range(expected): + imgy, result = results.get() + lines[imgy] = result + + for proc in procs: + work.put(None) + for proc in procs: + proc.join() + + signal.signal(signal.SIGINT, original_handler) + if interrupted: + raise KeyboardInterrupt() + + img = Image.frombytes('RGBA', (imgwidth, imgheight), b''.join(lines)) + return img + + def pixel_renderer( + work: multiprocessing.Queue, + results: multiprocessing.Queue, + minx: int, + maxx: int, + imgwidth: int, + texwidth: int, + texheight: int, + inverse: Matrix, + origin: Point, + add_color: Tuple[int, int, int, int], + mult_color: Color, + blendfunc: int, + imgbytes: bytes, + texbytes: bytes, + ) -> None: + while True: + imgy = work.get() + if imgy is None: + return + + result: List[Sequence[int]] = [] + for imgx in range(imgwidth): # Determine offset imgoff = imgx + (imgy * imgwidth) + if imgx < minx or imgx >= maxx: + result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)]) + continue # Calculate what texture pixel data goes here. texloc = inverse.multiply_point(Point(float(imgx), float(imgy))).add(origin) @@ -210,155 +286,50 @@ def affine_composite( # If we're out of bounds, don't update. if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight: + result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)]) continue # Blend it. texoff = texx + (texy * texwidth) - imgmap[imgoff] = affine_blend_impl(add_color, mult_color, texmap[texoff], imgmap[imgoff], blendfunc) + result.append(blend_point(add_color, mult_color, texbytes[(texoff * 4):((texoff + 1) * 4)], imgbytes[(imgoff * 4):((imgoff + 1) * 4)], blendfunc)) - img.putdata(imgmap) - else: - imgbytes = img.tobytes('raw', 'RGBA') - texbytes = texture.tobytes('raw', 'RGBA') + linebytes = bytes([channel for pixel in result for channel in pixel]) + results.put((imgy, linebytes)) - # Let's spread the load across multiple processors. - procs: List[multiprocessing.Process] = [] - work: multiprocessing.Queue = multiprocessing.Queue() - results: multiprocessing.Queue = multiprocessing.Queue() - expected: int = 0 - interrupted: bool = False + def blend_point( + add_color: Tuple[int, int, int, int], + mult_color: Color, + # This should be a sequence of exactly 4 values, either bytes or a tuple. + src_color: Sequence[int], + # This should be a sequence of exactly 4 values, either bytes or a tuple. + dest_color: Sequence[int], + blendfunc: int, + ) -> Sequence[int]: + # Calculate multiplicative and additive colors against the source. + src_color = ( + clamp((src_color[0] * mult_color.r) + add_color[0]), + clamp((src_color[1] * mult_color.g) + add_color[1]), + clamp((src_color[2] * mult_color.b) + add_color[2]), + clamp((src_color[3] * mult_color.a) + add_color[3]), + ) - def ctrlc(sig: Any, frame: Any) -> None: - nonlocal interrupted - interrupted = True - - original_handler = signal.getsignal(signal.SIGINT) - signal.signal(signal.SIGINT, ctrlc) - - for _ in range(cores): - proc = multiprocessing.Process( - target=pixel_renderer, - args=( - work, - results, - minx, - maxx, - imgwidth, - texwidth, - texheight, - inverse, - origin, - add_color, - mult_color, - blendfunc, - imgbytes, - texbytes, - ), - ) - procs.append(proc) - proc.start() - - for imgy in range(miny, maxy): - work.put(imgy) - expected += 1 - - lines: List[bytes] = [ - imgbytes[x:(x + (imgwidth * 4))] - for x in range( - 0, - imgwidth * imgheight * 4, - imgwidth * 4, - ) - ] - for _ in range(expected): - imgy, result = results.get() - lines[imgy] = result - - for proc in procs: - work.put(None) - for proc in procs: - proc.join() - - signal.signal(signal.SIGINT, original_handler) - if interrupted: - raise KeyboardInterrupt() - - img = Image.frombytes('RGBA', (imgwidth, imgheight), b''.join(lines)) - return img - - -def pixel_renderer( - work: multiprocessing.Queue, - results: multiprocessing.Queue, - minx: int, - maxx: int, - imgwidth: int, - texwidth: int, - texheight: int, - inverse: Matrix, - origin: Point, - add_color: Tuple[int, int, int, int], - mult_color: Color, - blendfunc: int, - imgbytes: bytes, - texbytes: bytes, -) -> None: - while True: - imgy = work.get() - if imgy is None: - return - - result: List[Sequence[int]] = [] - for imgx in range(imgwidth): - # Determine offset - imgoff = imgx + (imgy * imgwidth) - if imgx < minx or imgx >= maxx: - result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)]) - continue - - # Calculate what texture pixel data goes here. - texloc = inverse.multiply_point(Point(float(imgx), float(imgy))).add(origin) - texx, texy = texloc.as_tuple() - - # If we're out of bounds, don't update. - if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight: - result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)]) - continue - - # Blend it. - texoff = texx + (texy * texwidth) - result.append(affine_blend_impl(add_color, mult_color, texbytes[(texoff * 4):((texoff + 1) * 4)], imgbytes[(imgoff * 4):((imgoff + 1) * 4)], blendfunc)) - - linebytes = bytes([channel for pixel in result for channel in pixel]) - results.put((imgy, linebytes)) - - -def affine_blend_impl( - add_color: Tuple[int, int, int, int], - mult_color: Color, - # This should be a sequence of exactly 4 values, either bytes or a tuple. - src_color: Sequence[int], - # This should be a sequence of exactly 4 values, either bytes or a tuple. - dest_color: Sequence[int], - blendfunc: int, -) -> Sequence[int]: - if blendfunc == 3: - return blend_multiply(dest_color, src_color, mult_color, add_color) - # TODO: blend mode 4, which is "screen" blending according to SWF references. I've only seen this - # in Jubeat and it implements it using OpenGL equation Src * (1 - Dst) + Dst * 1. - # TODO: blend mode 5, which is "lighten" blending according to SWF references. Jubeat does not - # premultiply by alpha, but the GL/DX equation is max(Src * As, Dst * 1). - # TODO: blend mode 6, which is "darken" blending according to SWF references. Jubeat does not - # premultiply by alpha, but the GL/DX equation is min(Src * As, Dst * 1). - # TODO: blend mode 10, which is "invert" according to SWF references. The only game I could find - # that implemented this had equation Src * (1 - Dst) + Dst * (1 - As). - # TODO: blend mode 13, which is "overlay" according to SWF references. The equation seems to be - # Src * Dst + Dst * Src but Jubeat thinks it should be Src * Dst + Dst * (1 - As). - elif blendfunc == 8: - return blend_addition(dest_color, src_color, mult_color, add_color) - elif blendfunc == 9 or blendfunc == 70: - return blend_subtraction(dest_color, src_color, mult_color, add_color) - # TODO: blend mode 75, which is not in the SWF spec and appears to have the equation - # Src * (1 - Dst) + Dst * (1 - Src). - else: - return blend_normal(dest_color, src_color, mult_color, add_color) + if blendfunc == 3: + return blend_multiply(dest_color, src_color) + # TODO: blend mode 4, which is "screen" blending according to SWF references. I've only seen this + # in Jubeat and it implements it using OpenGL equation Src * (1 - Dst) + Dst * 1. + # TODO: blend mode 5, which is "lighten" blending according to SWF references. Jubeat does not + # premultiply by alpha, but the GL/DX equation is max(Src * As, Dst * 1). + # TODO: blend mode 6, which is "darken" blending according to SWF references. Jubeat does not + # premultiply by alpha, but the GL/DX equation is min(Src * As, Dst * 1). + # TODO: blend mode 10, which is "invert" according to SWF references. The only game I could find + # that implemented this had equation Src * (1 - Dst) + Dst * (1 - As). + # TODO: blend mode 13, which is "overlay" according to SWF references. The equation seems to be + # Src * Dst + Dst * Src but Jubeat thinks it should be Src * Dst + Dst * (1 - As). + elif blendfunc == 8: + return blend_addition(dest_color, src_color) + elif blendfunc == 9 or blendfunc == 70: + return blend_subtraction(dest_color, src_color) + # TODO: blend mode 75, which is not in the SWF spec and appears to have the equation + # Src * (1 - Dst) + Dst * (1 - Src). + else: + return blend_normal(dest_color, src_color) diff --git a/bemani/format/afp/blendalt.pyi b/bemani/format/afp/blendalt.pyi new file mode 100644 index 0000000..292d88c --- /dev/null +++ b/bemani/format/afp/blendalt.pyi @@ -0,0 +1,16 @@ +from PIL import Image # type: ignore +from typing import Tuple + +from .types.generic import Color, Matrix, Point + +def affine_composite( + img: Image.Image, + add_color: Tuple[int, int, int, int], + mult_color: Color, + transform: Matrix, + origin: Point, + blendfunc: int, + texture: Image.Image, + single_threaded: bool = False, +) -> Image.Image: + ... diff --git a/bemani/format/afp/blendalt.pyx b/bemani/format/afp/blendalt.pyx new file mode 100644 index 0000000..ad4afb2 --- /dev/null +++ b/bemani/format/afp/blendalt.pyx @@ -0,0 +1,129 @@ +from PIL import Image # type: ignore +from typing import Tuple + +from .types.generic import Color, Matrix, Point + +cdef extern struct intcolor_t: + unsigned char r; + unsigned char g; + unsigned char b; + unsigned char a; + +cdef extern struct floatcolor_t: + float r; + float g; + float b; + float a; + +cdef extern struct matrix_t: + float a; + float b; + float c; + float d; + float tx; + float ty; + +cdef extern struct point_t: + float x; + float y; + +cdef extern int affine_composite_fast( + unsigned char *imgdata, + unsigned int imgwidth, + unsigned int imgheight, + unsigned int minx, + unsigned int maxx, + unsigned int miny, + unsigned int maxy, + intcolor_t add_color, + floatcolor_t mult_color, + matrix_t inverse, + point_t origin, + int blendfunc, + unsigned char *texdata, + unsigned int texwidth, + unsigned int texheight, + int single_threaded +) + +def affine_composite( + img: Image.Image, + add_color: Tuple[int, int, int, int], + mult_color: Color, + transform: Matrix, + origin: Point, + blendfunc: int, + texture: Image.Image, + single_threaded: bool = False, +) -> Image.Image: + # Calculate the inverse so we can map canvas space back to texture space. + try: + inverse = transform.inverse() + except ZeroDivisionError: + # If this happens, that means one of the scaling factors was zero, making + # this object invisible. We can ignore this since the object should not + # be drawn. + print(f"WARNING: Transform Matrix {transform} has zero scaling factor, making it non-invertible!") + return img + + if blendfunc not in {0, 2, 3, 8, 9, 70}: + print(f"WARNING: Unsupported blend {blendfunc}") + return img + + # These are calculated properties and caching them outside of the loop + # speeds things up a bit. + imgwidth = img.width + imgheight = img.height + texwidth = texture.width + texheight = texture.height + + # Calculate the maximum range of update this texture can possibly reside in. + pix1 = transform.multiply_point(Point.identity().subtract(origin)) + pix2 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, 0))) + pix3 = transform.multiply_point(Point.identity().subtract(origin).add(Point(0, texheight))) + pix4 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, texheight))) + + # Map this to the rectangle we need to sweep in the rendering image. + minx = max(int(min(pix1.x, pix2.x, pix3.x, pix4.x)), 0) + maxx = min(int(max(pix1.x, pix2.x, pix3.x, pix4.x)) + 1, imgwidth) + miny = max(int(min(pix1.y, pix2.y, pix3.y, pix4.y)), 0) + maxy = min(int(max(pix1.y, pix2.y, pix3.y, pix4.y)) + 1, imgheight) + + if maxx <= 0 or maxy <= 0: + # This image is entirely off the screen! + return img + + # Grab the raw image data. + imgbytes = img.tobytes('raw', 'RGBA') + texbytes = texture.tobytes('raw', 'RGBA') + + # Convert classes to C structs. + cdef intcolor_t c_addcolor = intcolor_t(r=add_color[0], g=add_color[1], b=add_color[2], a=add_color[3]) + cdef floatcolor_t c_multcolor = floatcolor_t(r=mult_color.r, g=mult_color.g, b=mult_color.b, a=mult_color.a) + cdef matrix_t c_inverse = matrix_t(a=inverse.a, b=inverse.b, c=inverse.c, d=inverse.d, tx=inverse.tx, ty=inverse.ty) + cdef point_t c_origin = point_t(x=origin.x, y=origin.y) + + # Call the C++ function. + errors = affine_composite_fast( + imgbytes, + imgwidth, + imgheight, + minx, + maxx, + miny, + maxy, + c_addcolor, + c_multcolor, + c_inverse, + c_origin, + blendfunc, + texbytes, + texwidth, + texheight, + single_threaded, + ) + if errors != 0: + raise Exception("Error raised in C++!") + + # We blitted in-place, return that. + return Image.frombytes('RGBA', (imgwidth, imgheight), imgbytes) diff --git a/bemani/format/afp/blendaltimpl.cpp b/bemani/format/afp/blendaltimpl.cpp new file mode 100644 index 0000000..9a4c8c9 --- /dev/null +++ b/bemani/format/afp/blendaltimpl.cpp @@ -0,0 +1,232 @@ +#include +#include + +extern "C" +{ + typedef struct intcolor { + unsigned char r; + unsigned char g; + unsigned char b; + unsigned char a; + } intcolor_t; + + typedef struct floatcolor { + float r; + float g; + float b; + float a; + } floatcolor_t; + + typedef struct point { + float x; + float y; + + struct point add(struct point other) { + return (struct point){ + x + other.x, + y + other.y, + }; + }; + } point_t; + + typedef struct matrix { + float a; + float b; + float c; + float d; + float tx; + float ty; + + point_t multiply_point(point_t point) { + return (point_t){ + (a * point.x) + (c * point.y) + tx, + (b * point.x) + (d * point.y) + ty, + }; + } + } matrix_t; + + inline unsigned char clamp(float color) { + return fmin(fmax(0.0, roundf(color)), 255.0); + } + + intcolor_t blend_normal( + intcolor_t dest, + intcolor_t src + ) { + // "Normal" blend mode, which is just alpha blending. Various games use the DX + // equation Src * As + Dst * (1 - As). We premultiply Dst by Ad as well, since + // we are blitting onto a destination that could have transparency. Once we are + // done, we divide out the premultiplied Ad in order to put the pixes back to + // their full blended values since we are not setting the destination alpha to 1.0. + // This enables partial transparent backgrounds to work properly. + + // Short circuit for speed. + if (src.a == 0) { + return dest; + } + if (src.a == 255) { + return src; + } + + // Calculate alpha blending. + float srcpercent = src.a / 255.0; + float destpercent = dest.a / 255.0; + float srcremaineder = 1.0 - srcpercent; + float new_alpha = (srcpercent + destpercent * srcremaineder); + return (intcolor_t){ + clamp(((dest.r * destpercent * srcremaineder) + (src.r * srcpercent)) / new_alpha), + clamp(((dest.g * destpercent * srcremaineder) + (src.g * srcpercent)) / new_alpha), + clamp(((dest.b * destpercent * srcremaineder) + (src.b * srcpercent)) / new_alpha), + clamp(255 * new_alpha) + }; + } + + intcolor_t blend_addition( + intcolor_t dest, + intcolor_t src + ) { + // "Addition" blend mode, which is used for fog/clouds/etc. Various games use the DX + // equation Src * As + Dst * 1. It appears jubeat does not premultiply the source + // by its alpha component. + + // Short circuit for speed. + if (src.a == 0) { + return dest; + } + + // Calculate final color blending. + float srcpercent = src.a / 255.0; + return (intcolor_t){ + clamp(dest.r + (src.r * srcpercent)), + clamp(dest.g + (src.g * srcpercent)), + clamp(dest.b + (src.b * srcpercent)), + dest.a, + }; + } + + intcolor_t blend_subtraction( + intcolor_t dest, + intcolor_t src + ) { + // "Subtraction" blend mode, used for darkening an image. Various games use the DX + // equation Dst * 1 - Src * As. It appears jubeat does not premultiply the source + // by its alpha component much like the "additive" blend above.. + + // Short circuit for speed. + if (src.a == 0) { + return dest; + } + + // Calculate final color blending. + float srcpercent = src.a / 255.0; + return (intcolor_t){ + clamp(dest.r - (src.r * srcpercent)), + clamp(dest.g - (src.g * srcpercent)), + clamp(dest.b - (src.b * srcpercent)), + dest.a, + }; + } + + intcolor_t blend_multiply( + intcolor_t dest, + intcolor_t src + ) { + // "Multiply" blend mode, used for darkening an image. Various games use the DX + // equation Src * 0 + Dst * Src. It appears jubeat uses the alternative formula + // Src * Dst + Dst * (1 - As) which reduces to the first equation as long as the + // source alpha is always 255. + + // Calculate final color blending. + return (intcolor_t){ + clamp(255 * ((dest.r / 255.0) * (src.r / 255.0))), + clamp(255 * ((dest.g / 255.0) * (src.g / 255.0))), + clamp(255 * ((dest.b / 255.0) * (src.b / 255.0))), + dest.a, + }; + } + + intcolor_t blend_point( + intcolor_t add_color, + floatcolor_t mult_color, + intcolor_t src_color, + intcolor_t dest_color, + int blendfunc + ) { + // Calculate multiplicative and additive colors against the source. + src_color = (intcolor_t){ + clamp((src_color.r * mult_color.r) + add_color.r), + clamp((src_color.g * mult_color.g) + add_color.g), + clamp((src_color.b * mult_color.b) + add_color.b), + clamp((src_color.a * mult_color.a) + add_color.a), + }; + + if (blendfunc == 3) { + return blend_multiply(dest_color, src_color); + } + // TODO: blend mode 4, which is "screen" blending according to SWF references. I've only seen this + // in Jubeat and it implements it using OpenGL equation Src * (1 - Dst) + Dst * 1. + // TODO: blend mode 5, which is "lighten" blending according to SWF references. Jubeat does not + // premultiply by alpha, but the GL/DX equation is max(Src * As, Dst * 1). + // TODO: blend mode 6, which is "darken" blending according to SWF references. Jubeat does not + // premultiply by alpha, but the GL/DX equation is min(Src * As, Dst * 1). + // TODO: blend mode 10, which is "invert" according to SWF references. The only game I could find + // that implemented this had equation Src * (1 - Dst) + Dst * (1 - As). + // TODO: blend mode 13, which is "overlay" according to SWF references. The equation seems to be + // Src * Dst + Dst * Src but Jubeat thinks it should be Src * Dst + Dst * (1 - As). + if (blendfunc == 8) { + return blend_addition(dest_color, src_color); + } + if (blendfunc == 9 || blendfunc == 70) { + return blend_subtraction(dest_color, src_color); + } + // TODO: blend mode 75, which is not in the SWF spec and appears to have the equation + // Src * (1 - Dst) + Dst * (1 - Src). + return blend_normal(dest_color, src_color); + } + + int affine_composite_fast( + unsigned char *imgbytes, + unsigned int imgwidth, + unsigned int imgheight, + unsigned int minx, + unsigned int maxx, + unsigned int miny, + unsigned int maxy, + intcolor_t add_color, + floatcolor_t mult_color, + matrix_t inverse, + point_t origin, + int blendfunc, + unsigned char *texbytes, + unsigned int texwidth, + unsigned int texheight, + int single_threaded + ) { + // Cast to a usable type. + intcolor_t *imgdata = (intcolor_t *)imgbytes; + intcolor_t *texdata = (intcolor_t *)texbytes; + + for (unsigned int imgy = miny; imgy < maxy; imgy++) { + for (unsigned int imgx = minx; imgx < maxx; imgx++) { + // Determine offset. + unsigned int imgoff = imgx + (imgy * imgwidth); + + // Calculate what texture pixel data goes here. + point_t texloc = inverse.multiply_point((point_t){(float)imgx, (float)imgy}).add(origin); + int texx = roundf(texloc.x); + int texy = roundf(texloc.y); + + // If we're out of bounds, don't update. + if (texx < 0 or texy < 0 or texx >= (int)texwidth or texy >= (int)texheight) { + continue; + } + + // Blend it. + unsigned int texoff = texx + (texy * texwidth); + imgdata[imgoff] = blend_point(add_color, mult_color, texdata[texoff], imgdata[imgoff], blendfunc); + } + } + + return 0; + } +} diff --git a/bemani/format/afp/render.py b/bemani/format/afp/render.py index f139411..37b7b94 100644 --- a/bemani/format/afp/render.py +++ b/bemani/format/afp/render.py @@ -374,16 +374,6 @@ class AFPRenderer(VerboseOutput): # Compute the affine transformation matrix for this object. transform = parent_transform.multiply(renderable.transform) - # Calculate the inverse so we can map canvas space back to texture space. - try: - inverse = transform.inverse() - except ZeroDivisionError: - # If this happens, that means one of the scaling factors was zero, making - # this object invisible. We can ignore this since the object should not - # be drawn. - print(f"WARNING: Transform Matrix {transform} has zero scaling factor, making it non-invertible!") - return img - # Render individual shapes if this is a sprite. if isinstance(renderable, PlacedClip): # This is a sprite placement reference. @@ -458,7 +448,7 @@ class AFPRenderer(VerboseOutput): img.alpha_composite(texture, cutin.as_tuple(), cutoff.as_tuple()) else: # We can't, so do the slow render that's correct. - img = affine_composite(img, add_color, mult_color, transform, inverse, origin, blend, texture, single_threaded=self.__single_threaded) + img = affine_composite(img, add_color, mult_color, transform, origin, blend, texture, single_threaded=self.__single_threaded) else: raise Exception(f"Unknown placed object type to render {renderable}!") diff --git a/setup.py b/setup.py index ae7591d..a0c1158 100644 --- a/setup.py +++ b/setup.py @@ -123,6 +123,13 @@ setup( "bemani/format/afp/blend.py", ] ), + Extension( + "bemani.format.afp.blendalt", + [ + "bemani/format/afp/blendalt.pyx", + "bemani/format/afp/blendaltimpl.cpp", + ] + ), Extension( "bemani.format.afp.types.generic", [